# Description ------------------------------------------------------------------

### This file reproduces the contents of the quantitative Appendices (D - O)

### NOTE: Some of the .tex tables produced by this code have been slightly
### modified for aesthetic purposes before being placed in the Appendix.

### POTENTIALLY CONFUSING MODEL NAMES EXPLANATION 
### Some of the model object names are tied to hypotheses (start with
### h# as in h4) and some are tied to the table in which they are reported (such
### as i2 for a model reported in table I2). In either case, subsequent numbers
### and letters uniquely identify a model (within the table and, in conjunction
### with the table or hypothesis prefix, globally). We apologize for any 
### confusion but we had to shorten model object names considerably because of 
### a bug in Stargazer v. 5.2.3 that causes errors for long model names.

# Setup ------------------------------------------------------------------------
out_path <- 'output/appendix/'

## Packages --------------------------------------------------------------------
#if packages are not installed, they must be installed with:
# install.packages("package_name"); The name of package must be in quotes
library(dplyr)
library(readr)
library(tidyr)
library(purrr)
library(tibble)
library(stringr)
library(ivreg)
library(broom)
library(forcats)
library(ggplot2)
library(car)
library(xtable)
library(stargazer)
library(multiwayvcov)
library(haven)
library(labelled)
library(lme4)
library(patchwork)

## Functions -------------------------------------------------------------------
source("scripts/0_functions/functions_analysis.R")

## Data ------------------------------------------------------------------------
raw_path <- 'data/1_raw/'
clean_path <- 'data/2_clean/'
formatted_path <- 'data/3_formatted/'

### CLEAN ----

#endline vendor survey data
load(paste0(clean_path, "vendor_end.Rdata"))

#baseline vendor survey data
load(paste0(clean_path, "vendor_base.Rdata"))

#tax collector endline survey data
load(paste0(clean_path, "tax_collector_end.RData"))

#tax collector baseline survey data
load(paste0(clean_path, "tax_collector_base.RData"))

# market month
load(paste0(clean_path, "month_data_merged.RData"))

# market only data
load(paste0(clean_path, "market_only_data.RData"))

# market scoping data
load(paste0(clean_path, "mkt_scoping_sample.RData"))

### FORMATTED ----

# market-level outcomes
load(paste0(formatted_path, "market_lvl.RData"))

# diffs in outcomes at market level
load(paste0(formatted_path, "market_lvl_diffs.RData"))

# additional market month analysis objects
load(paste0(formatted_path, "market_month_additional.RData"))

# spillover analysis objects
load(paste0(formatted_path, "spillovers.RDATA"))

### MAIN ANALYSIS MODELS
load("output/main/main_analysis_models.RData")

# Appendix D: Survey Descriptive Statistics ------------------------------------

### Note: clustered difference of means tests used except where indicated

#### Demographic Variables ----

##### Note: household income (hh_income_trim) rounded to nearest whole kwacha
##### after creation of the .tex files here

# Table D1
dem_stats_v_base <- create_sum_stats_tab(
  vendor_base, 
  treatment_status, 
  cluster_var = market, 
  age, 
  female,
  educ_num,
  literacy_any,
  hh_income_trim,
  service,
  sell_daily,
  yrs_in_mkt_fix,
  size = "scriptsize",
  digits = 3,
  var_names = c(
    "Age",
    "Female",
    "Education (ordinal)",
    "Literate",
    "\\makecell[l]{Household income\\\\(Malawian kwacha)}",
    "Vendor sells services",
    "Vendor sells daily",
    "Years selling in market"
  ),
  title = "Summary Stats for Demographic Variables Vendor Survey - Baseline",
  file = paste0(out_path, "tableD1.tex"))

# Table D2
dem_stats_v_end <- create_sum_stats_tab(
  vendor_end,
  treatment_status, 
  cluster_var = market, 
  age, 
  female,
  educ_num,
  literacy_any,
  hh_income_trim_99,
  service,
  sell_daily,
  yrs_in_mkt_fix,
  size = "scriptsize",
  digits = 3,
  var_names = c(
    "Age",
    "Female",
    "Education (ordinal)",
    "Literate",
    "\\makecell[l]{Household income\\\\(Malawian kwacha)}",
    "Vendor sells services",
    "Vendor sells daily",
    "Years selling in market"
  ),
  title = "Summary Stats for Demographic Variables Vendor Survey - Endline",
  file = paste0(out_path, "tableD2.tex"))

dem_stats_tc_base <- create_sum_stats_tab(
  tax_collector_base, 
  treatment_status,
  age,
  female,
  educ_num,
  literacy_any,
  hh_income,
  days_wrk_mkt,
  no_english,
  size = "scriptsize",
  digits = 3,
  cluster = F,
  var_names = c(
    "Age",
    "Female",
    "Education (ordinal)",
    "Literate",
    "\\makecell[l]{Household income\\\\(Malawian kwacha)}",
    "Days worked in market",
    "Does not speak English"
  ),
  title = "Summary Stats for Demographic Variables TC Survey - Baseline",
  file = paste0(out_path, "tableD3.tex"))

dem_stats_tc_end <- create_sum_stats_tab(
  tax_collector_end,
  treatment_status,
  age,
  female,
  educ_num,
  literacy_any,
  hh_income,
  days_wrk_mkt,
  no_english,
  size = "scriptsize",
  digits = 3,
  cluster = F,
  var_names = c(
    "Age",
    "Female",
    "Education (ordinal)",
    "Literate",
    "\\makecell[l]{Household income\\\\(Malawian kwacha)}",
    "Days worked in market",
    "Does not speak English"
  ),
  title = "Summary Stats for Demographic Variables TC Survey - Endline",
  file = paste0(out_path, "tableD4.tex"))

#### Outcome Variables ----

ov_stats_v_base <- create_sum_stats_tab(
  vendor_base, 
  treatment_status,
  cluster_var = market, 
  fee1_full, 
  fee2_always, recent_receipt_7, 
  no_rcpt_when_pay_num,
  tr1_num, tr2_num,
  tr9e_num, ms1_num, ms3_num, ms4_num,
  ms5_num, ms6_num, satisfaction_dev_num, 
  ms_average, tc2_10_clean,
  tax_morale_num, tc2_4b_num, tc5a_num, 
  tc5b_num,
  tc2_15b_num,
  petition,
  petition_wname,
  digits = 3,
  var_names = c(
    "Self-reported compliance",
    "Perceived group compliance",
    "Ability to produce receipt",
    "No receipt when paying",
    "Trust in Local Gov.",
    "Trust in Ward Cllr.",
    "District Manages Funds Well",
    "Satisfaction w/ Water Access",
    "`` '' Garbage Collection",
    "`` '' Pathways Conditions",
    "`` '' Stall Conditions",
    " `` '' Security",
    "`` '' Developments in Mkt",
    "Respondent Avg. Services Satisf.",
    "\\makecell[l]{Perception of Relative\\\\Amount Spent on Services\\\\(Malawian kwacha)}",
    "Vendors Should Pay Taxes",
    "Paying Taxes is a Duty",
    "Ind'l Evasion Possible",
    "Group Evasion Possible",
    "Pay Because Consequences",
    "Sign Petition Anon.",
    "Sign Petition w/ Name"
  ),
  title = "Summary Stats for Outcome Variables Vendor Survey - Baseline",
  file = paste0(out_path, "tableD5.tex"))

ov_stats_v_end <- create_sum_stats_tab(
  vendor_end,
  treatment_status, 
  cluster_var = market, 
  fee1_full, 
  fee2_always, recent_receipt_7, 
  no_rcpt_when_pay_num,
  tr1_num, tr2_num,
  tr9e_num, tr9g_num, tr9h_num,
  ms1_num, ms3_num, ms4_num,
  ms5_num, ms6_num, satisfaction_dev_num, 
  ms_average, tc2_10_clean, tc9_clean,
  pay_even_disagree, tc2_4b_num, tc5a_num, 
  tc5b_num,
  #tc2_15b_num,
  petition,
  petition_wname,
  stmt1_agree,
  stmt1_agree_sent,
  stmt2_agree,
  stmt2_agree_sent,
  digits = 3,
  var_names = c(
    "Self-reported compliance",
    "Perceived group compliance",
    "Ability to produce receipt",
    "No receipt when paying",
    "Trust in Local Gov.",
    "Trust in Ward Cllr.",
    "District Manages Funds Well",
    "District Spending Transparent",
    "District Tax Collection Transparent",
    "Satisfaction w/ Water Access",
    "`` '' Garbage Collection",
    "`` '' Pathways Conditions",
    "`` '' Stall Conditions",
    " `` '' Security",
    "`` '' Developments in Mkt",
    "Respondent Avg. Services Satisf.",
    "\\makecell[l]{Perception of Relative\\\\Amount Spent on Services\\\\(Malawian kwacha)}",
    "\\makecell[l]{Perception of Relative\\\\Amount of Taxes Reaching Gov't\\\\(Malawian kwacha)}",
    "Vendors Should Pay Taxes",
    "Paying Taxes is a Duty",
    "Ind'l Evasion Possible",
    "Group Evasion Possible",
    # Pay Because Consequences here
    "Sign Petition Anon.",
    "Sign Petition w/ Name",
    "Reduce Gov. Overreach (Agree)",
    "Reduce Gov. Overreach (SMS)",
    "Increase Gov. Effectiv. (Agree)",
    "Increase Gov. Effectiv. (SMS)"
  ),
  title = "Summary Stats for Outcome Variables Vendor Survey - Endline",
  file = paste0(out_path, "tableD6.tex"))

# Chitenje has only 1 observation for tc2_15b_num (Pay Because Consequences), 
# which doesn't work with cluster.vcov, so we use the unclustered version 
# the summary table function and then paste the line into the table into Table
# D6 in the appendix
ov_stats_v_end_tc2_15b_num <- create_sum_stats_tab(
  vendor_end, 
  treatment_status,
  cluster = FALSE,
  tc2_15b_num,
  digits = 3,,
  var_names = c(
    "\\makecell[l]{Pay Because Consequences\\\\(Unclustered $t$-Test)}"
  ),
  file = paste0(out_path, "tableD6_addition.tex"),
  title = "Summary Stats for Outcome Variables Vendor Survey - Endline")

ov_sum_stats_tc_base <- create_sum_stats_tab(
  tax_collector_base, 
  treatment_status, 
  hrs_in_mkt,
  vendors_visited_trim_99,
  digits = 3, 
  title = "Summary Stats for Outcome Variables Tax Collector Survey - Baseline",
  var_names = c("Hours in Market",
                "Vendors Visited"),
  cluster = F,
  file = paste0(out_path, "tableD7.tex"))

ov_sum_stats_tc_end <- create_sum_stats_tab(
  tax_collector_end, 
  treatment_status, 
  hrs_in_mkt_fix,
  vendors_visited_trim_99,
  digits = 3, 
  title = "Summary Stats for Outcome Variables Tax Collector Survey - Endline",
  var_names = c("Hours in Market",
                "Vendors Visited"),
  cluster = F,
  file = paste0(out_path, "tableD8.tex"))


# Appendix E: Revenue Results -------------------------------------------------

## Figure E1 ----

market_month |> 
  filter(year_month != "2018_12") |> 
  mutate(year_month = fct_relabel(year_month,
                                  ~ str_replace(., "_", "/")),
         treatment_status = factor(treatment_status,
                                   levels = c("Control", "BOTH", "BU", "TD"))) |> 
  plot_revenue_tgs(market_fees_collected_cl_no0s_fix2_st,
                   conf_int = F, point_size = 3) +
  labs(y = "Market Fee Units",
       x = "Year and Month") + 
  scale_color_viridis_d(name = "Treatment Group",
                        begin = 0,
                        end = 0.75,
                        option = "magma") +
  scale_shape_discrete(name = "Treatment Group")
ggsave(paste0(out_path, "figureE1.png"),
       width = 6.2, height = 4, units = "in")

## Table E1 ----

### Models ----

### Market DIM Fee Units
h2_dim <- lm(
  data = market_month_nov18, 
  market_fees_collected_cl_no0s_st ~ BU + TD + Both + block_id)


### Market DID
h2_did_diffs_st_nov <- lm(
  data = market_month_diffs,
  nov18_nov17_st ~ BU + TD + Both + block_id)

### Tex Table ----
stargazer(h2_dim,  h2_did_diffs_st_nov,
          keep = c("BU", "TD", "Both"),
          column.labels = c("Market DIM", "Market DID"), 
          title = "Treatment Effects on Market Fee Units",
          model.numbers = F,
          notes = c("Market-level models include block fixed-effects."),
          keep.stat = c("n", "adj.rsq"),
          dep.var.labels.include = FALSE,
          notes.label = "Notes",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "tableE1.tex"))

# Appendix G: Mechanism Effects as Percent Increase Over Control ---------------

test_vars <- c("BU", "TD", "Both")

# collect all models in a list
sig_mech_models <- list(h4_dg,
                        h4_wc,
                        h6_ms10,    
                        h6_ms1,
                        h7_taxduty,
                        h9_tc2_15b,
                        h10_tc9,
                        h11_hrs_in_mkt)

# short convenience function to extract coefficients we care about
format_coefs <- function(x){
  out <- tidy(x) |> filter(term %in% test_vars)
  out$outs_reg <- deparse(formula(x)[[2]]) # get outcome variable from formula
  out
}

# bind all coefficient values we care about together
sig_mech_ests <- lapply(sig_mech_models,
                        format_coefs) |>
  list_rbind()

# correct p-value for clustering
## short convenience function for getting p-value from a model
get_correct_p <- function(x){
  
  calc_p_cluster(x, test_vars, cluster_data = get(as.character(x$call$data)))
  
}
sig_mech_ests$p.value <- unlist(lapply(sig_mech_models,
                                       get_correct_p))

# discarding non-significant effects
sig_mech_ests <- sig_mech_ests |> 
  filter(p.value <= 0.05)

# storing outcome names
outs <- sig_mech_ests$outs_reg |> unique()
# to make things easier, replace as.numeric() calls with _num versions of vars
outs_num <- str_replace(outs, "as.numeric\\(", "") |> 
  str_replace("_clean\\)", "_num")

# getting control group means for vendor vars
outs_mean_ven <- vendor_end |>
  filter( (BU == 0) & (TD == 0) & (Both == 0)) |> 
  summarise(across(any_of(outs_num), ~ mean(.x, na.rm = TRUE))) |> 
  pivot_longer(everything(), values_to = "mean", names_to = "outcome")
# getting control group means for tax collector variable
outs_mean_tc <- tax_collector_end |> 
  filter( (BU == 0) & (TD == 0) & (Both == 0)) |> 
  summarise(across(any_of(outs_num), ~ mean(.x, na.rm = TRUE))) |>
  pivot_longer(everything(), values_to = "mean", names_to = "outcome")
# combining
outs_mean <- bind_rows(outs_mean_ven, outs_mean_tc)
# adding original outcome names back in, for merging
outs_mean$outs_reg <- outs
# adding more interpretible outcome names in, for printing
outs_mean$outs_full <- c("Trust in Local Gov.",
                         "Trust in Ward Counc.",
                         "Services Satisfaction",
                         "Satisfaction with Water Access",
                         "Paying Tax as Duty",
                         "Pay Because Consequences",
                         "Money Flowing to Gov't",
                         "Hours [TC] Working in Market A Day")


# merging outs_means with sig_mech_ests
sig_mech_ests <- sig_mech_ests |> 
  left_join(outs_mean, by = "outs_reg")

# calculate percent increase
sig_mech_ests <- sig_mech_ests |> 
  mutate(per_inc = estimate/mean * 100)

# rearrange to make for nicer printing
sig_mech_ests_print <- sig_mech_ests |> 
  select(outs_full, term, estimate, per_inc, mean)

# rename columns for nicer printing
sig_mech_ests_print <- sig_mech_ests_print |> 
  rename(`\\textbf{Outcome}` = outs_full, 
         `\\textbf{Treatment Group}` = term,
         `\\textbf{Treatment Effect}` = estimate,
         `\\textbf{Percent Increase}` = per_inc,
         `\\textbf{Control Mean}` = mean)

print(xtable::xtable(sig_mech_ests_print, label = "app:tab:mech_per_inc",
                     align = c("l",
                               "p{0.25\\linewidth}",
                               "p{0.125\\linewidth}",
                               "p{0.125\\linewidth}",
                               "p{0.125\\linewidth}",
                               "p{0.125\\linewidth}"),
                     caption = "Treatment Effects as a Percent of Control Group",
                     digits = 3),
      comment = F,
      include.rownames = F,
      hline.after = 0:nrow(sig_mech_ests_print),
      sanitize.text.function = sanitize_allow_latex,
      sanitize.colnames.function = sanitize_allow_latex,
      caption.placement = "top",
      file = paste0(out_path, "tableG1.tex")
)  

# Appendix H: Trust and Engagement: Additional Results -------------------------

## Models ----

# regress trust on agreeing with statement for only BU treatment (BU + BOTH)
tr_gov_stmt1 <- lm(
  data = vendor_end |> filter(BU_treat == 1),
  as.numeric(tr1_clean) ~ stmt1_agree_sent) 
# effect as percent decrease
abs(coef(tr_gov_stmt1)[2])/coef(tr_gov_stmt1)[1] * 100

# refit regular regress, checking for interaction effect with trust
st1_el_trint <- lm(data = vendor_end, stmt1_agree_sent ~ 
                             (BU + TD + Both) * tr1_clean + 
                             block_id + as.factor(enum))

## Tex Table ----
stargazer(tr_gov_stmt1,
          st1_el_trint,
          se = list(calc.ses.cluster(tr_gov_stmt1, "market", 
                                     data = vendor_end |>
                                       filter(BU_treat == 1)),
                    calc.ses.cluster(st1_el_trint, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "stmt1_agree_sent"),
          title = "Political Engagement Outcomes",
          model.numbers = F,
          notes = c("Models include enumerator and block fixed-effects.",
                    "Models have SEs clustered on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Trust in Local Gov",
                             "Reduce Gov. Over-Reach"),
          label = "app:tab:behav_trust",
          star.cutoffs = c(.05, .01, .001),
          no.space = T,
          single.row = TRUE,
          header = F,
          covariate.labels = c(
            "Agreed to Send Message",
            NA,
            NA,
            "BOTH",
            "BU * Local Gov - Not Very Trustworthy",
            "BU * Local Gov - Somewhat Trustworthy",
            "BU * Local Gov - Very Trustworth",
            "TD * Local Gov - Not Very Trustworthy",
            "TD * Local Gov - Somewhat Trustworthy",
            "TD * Local Gov - Very Trustworth",
            "BOTH * Local Gov - Not Very Trustworthy",
            "BOTH * Local Gov - Somewhat Trustworthy",
            "BOTH * Local Gov - Very Trustworth"
          ),
          out = paste0(out_path, "tableH1.tex")
          )
# Appendix I: Spillovers -------------------------------------------------------

## Data Prep for Spillover Analysis ----
vendor_end_spill <- vendor_end |> 
  left_join(spills_Miguel, by = c("market" = "id")) |> 
  left_join(spills_IPW, by = c("market" = "id")) |> 
  left_join(spills_IPW_ind_mktlvl_bl, by = c("market" = "id")) |> 
  left_join(spills_IPW_mixed_mktlvl_bl, by = c("market" = "id"))

### IPW Intermediate Objects ----

#unadultered treatment conditions
main_treats <- c("000_000", "100_000",
                 "010_000", "001_000") 

#we can only compare main effects
##distance only
vendor_end_d2_main <- vendor_end_spill |> 
  filter(cond_d2 %in% main_treats &
           d2_000_000 > 0 & 
           d2_000_000 < 1 &
           d2_100_000 > 0 &
           d2_100_000 < 1 & 
           d2_010_000 > 0 &
           d2_010_000 < 1 &
           d2_001_000 > 0 &
           d2_001_000 < 1)

vendor_end_d5_main <- vendor_end_spill |> 
  filter(cond_d5 %in% main_treats &
           d5_000_000 > 0 & 
           d5_000_000 < 1 &
           d5_100_000 > 0 &
           d5_100_000 < 1 & 
           d5_010_000 > 0 &
           d5_010_000 < 1 &
           d5_001_000 > 0 &
           d5_001_000 < 1)

vendor_end_d10_main <- vendor_end_spill |> 
  filter(cond_d10 %in% main_treats &
           d10_000_000 > 0 & 
           d10_000_000 < 1 &
           d10_100_000 > 0 &
           d10_100_000 < 1 & 
           d10_010_000 > 0 &
           d10_010_000 < 1 &
           d10_001_000 > 0 &
           d10_001_000 < 1)

##distance + Baseline data
vendor_end_d2_mixed <- vendor_end_spill |> 
  filter(cond_d2_mix %in% main_treats &
           d2_000_000_mix > 0 & 
           d2_000_000_mix < 1 &
           d2_100_000_mix > 0 &
           d2_100_000_mix < 1 & 
           d2_010_000_mix > 0 &
           d2_010_000_mix < 1 &
           d2_001_000_mix > 0 &
           d2_001_000_mix < 1)

vendor_end_d5_mixed <- vendor_end_spill |> 
  filter(cond_d5_mix %in% main_treats &
           d5_000_000_mix > 0 & 
           d5_000_000_mix < 1 &
           d5_100_000_mix > 0 &
           d5_100_000_mix < 1 & 
           d5_010_000_mix > 0 &
           d5_010_000_mix < 1 &
           d5_001_000_mix > 0 &
           d5_001_000_mix < 1)

vendor_end_d10_mixed <- vendor_end_spill |> 
  filter(cond_d10_mix %in% main_treats &
           d10_000_000_mix > 0 & 
           d10_000_000_mix < 1 &
           d10_100_000_mix > 0 &
           d10_100_000_mix < 1 & 
           d10_010_000_mix > 0 &
           d10_010_000_mix < 1 &
           d10_001_000_mix > 0 &
           d10_001_000_mix < 1)


##individual data only:
vendor_end_ind <- vendor_end_spill |> 
  filter(cond_d1_ind %in% main_treats &
           d1_000_000_ind > 0 & 
           d1_000_000_ind < 1 &
           d1_100_000_ind > 0 &
           d1_100_000_ind < 1 & 
           d1_010_000_ind > 0 &
           d1_010_000_ind < 1 &
           d1_001_000_ind > 0 &
           d1_001_000_ind < 1) 

### Treatment Externalities Intermediate Objects ----
# for convenience, to make it possible to put diff models for each
# outcome into one graph
vendor_end_spill_daily <- vendor_end_spill |> 
  mutate(n_2_TD = ven_daily_wt_2_TD,
         n_5_TD = ven_daily_wt_5_TD,
         n_10_TD = ven_daily_wt_10_TD,
         n_2_BU = ven_daily_wt_2_BU,
         n_5_BU = ven_daily_wt_5_BU,
         n_10_BU = ven_daily_wt_10_BU,
         n_2_Both = ven_daily_wt_2_BOTH,
         n_5_Both = ven_daily_wt_5_BOTH,
         n_10_Both = ven_daily_wt_10_BOTH,
         n_2_all = ven_daily_wt_2_all,
         n_5_all = ven_daily_wt_5_all,
         n_10_all = ven_daily_wt_10_all)

vendor_end_spill_max <- vendor_end_spill |> 
  mutate(n_2_TD = ven_max_wt_2_TD,
         n_5_TD = ven_max_wt_5_TD,
         n_10_TD = ven_max_wt_10_TD,
         n_2_BU = ven_max_wt_2_BU,
         n_5_BU = ven_max_wt_5_BU,
         n_10_BU = ven_max_wt_10_BU,
         n_2_Both = ven_max_wt_2_BOTH,
         n_5_Both = ven_max_wt_5_BOTH,
         n_10_Both = ven_max_wt_10_BOTH,
         n_2_all = ven_max_wt_2_all,
         n_5_all = ven_max_wt_5_all,
         n_10_all = ven_max_wt_10_all)

vendor_end_spill_num <- vendor_end_spill |> 
  mutate(n_2_TD = number_2_TD,
         n_5_TD = number_5_TD,
         n_10_TD = number_10_TD,
         n_2_BU = number_2_BU,
         n_5_BU = number_5_BU,
         n_10_BU = number_10_BU,
         n_2_Both = number_2_BOTH,
         n_5_Both = number_5_BOTH,
         n_10_Both = number_10_BOTH,
         n_2_all = number_2_all,
         n_5_all = number_5_all,
         n_10_all = number_10_all)

## Appendix I.2: IPW Approach ----

### Table I1 ----

#### Models ----
i1_1 <- lm(fee1_full ~ BU + TD + Both + block_id + enum,
                       data = vendor_end_d2_main,
                       weights = 1/prob_d2) 
i1_2 <- lm(fee1_full ~ BU + TD + Both + block_id + enum,
                       data = vendor_end_d5_main,
                       weights = 1/prob_d5) 
i1_3 <- lm(fee1_full ~ BU + TD + Both + block_id + enum,
                        data = vendor_end_d10_main,
                        weights = 1/prob_d10) 

i1_1_mix <- lm(fee1_full ~ BU + TD + Both + block_id + enum,
                           data = vendor_end_d2_mixed,
                           weights = 1/prob_d2_mix) 
i1_2_mix <- lm(fee1_full ~ BU + TD + Both + block_id + enum,
                           data = vendor_end_d5_mixed,
                           weights = 1/prob_d5_mix) 
i1_3_mix <- lm(fee1_full ~ BU + TD + Both + block_id + enum,
                            data = vendor_end_d10_mixed,
                            weights = 1/prob_d10_mix) 

i1_4 <- lm(fee1_full ~ BU + TD + Both + block_id + enum,
                         data = vendor_end_ind,
                         weights = 1/prob_d1_ind) 

#### Tex Table ----
stargazer(i1_1,
          i1_1_mix,
          i1_2,
          i1_2_mix,
          i1_3,
          i1_3_mix,
          i1_4,
          se = list(calc.ses.cluster(i1_1, "market", 
                                     data = vendor_end_d2_main),
                    calc.ses.cluster(i1_1_mix, "market", 
                                     data = vendor_end_d2_mixed),
                    calc.ses.cluster(i1_2, "market", 
                                     data = vendor_end_d5_main),
                    calc.ses.cluster(i1_2_mix, "market", 
                                     data = vendor_end_d5_mixed),
                    calc.ses.cluster(i1_3, "market", 
                                     data = vendor_end_d10_main),
                    calc.ses.cluster(i1_3_mix, "market", 
                                     data = vendor_end_d10_mixed),
                    calc.ses.cluster(i1_4, "market", 
                                     data = vendor_end_ind)),
          keep = c("BU", "TD", "Both"),
          title = "Spillover Analyses, Self-Reported Compliance",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Self-Rep. Comp."),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          label = "tab:spillover_O2",
          header = F,
          font.size = "footnotesize",
          column.labels = c("D2", "D2 - Mixed",
                            "D5", "D5 - Mixed", "D10", "D10 - Mixed",
                            "Ind. Only"),
          out = paste0(out_path, "tableI1.tex"))

### Table I2 ---

#### Models ----
i2_1 <- lm(fee2_always ~ BU + TD + Both + block_id + enum,
                         data = vendor_end_d2_main,
                         weights = 1/prob_d2)
i2_2 <- lm(fee2_always ~ BU + TD + Both + block_id + enum,
                         data = vendor_end_d5_main,
                         weights = 1/prob_d5) 
i2_3 <- lm(fee2_always ~ BU + TD + Both + block_id + enum,
                          data = vendor_end_d10_main,
                          weights = 1/prob_d10)

i2_1_mix <- lm(fee2_always ~ BU + TD + Both + block_id + enum,
                             data = vendor_end_d2_mixed,
                             weights = 1/prob_d2_mix) 
i2_2_mix <- lm(fee2_always ~ BU + TD + Both + block_id + enum,
                             data = vendor_end_d5_mixed,
                             weights = 1/prob_d5_mix) 
i2_3_mix <- lm(fee2_always ~ BU + TD + Both + block_id + enum,
                              data = vendor_end_d10_mixed,
                              weights = 1/prob_d10_mix) 

i2_4 <- lm(fee2_always ~ BU + TD + Both + block_id + enum,
                           data = vendor_end_ind,
                           weights = 1/prob_d1_ind) 


#### Tex Table ----
stargazer(i2_1,
          i2_1_mix,
          i2_2,
          i2_2_mix,
          i2_3,
          i2_3_mix,
          i2_4,
          se = list(calc.ses.cluster(i2_1, "market", 
                                     data = vendor_end_d2_main),
                    calc.ses.cluster(i2_1_mix, "market", 
                                     data = vendor_end_d2_mixed),
                    calc.ses.cluster(i2_2, "market", 
                                     data = vendor_end_d5_main),
                    calc.ses.cluster(i2_2_mix, "market", 
                                     data = vendor_end_d5_mixed),
                    calc.ses.cluster(i2_3, "market", 
                                     data = vendor_end_d10_main),
                    calc.ses.cluster(i2_3_mix, "market", 
                                     data = vendor_end_d10_mixed),
                    calc.ses.cluster(i2_4, "market", 
                                     data = vendor_end_ind)),
          keep = c("BU", "TD", "Both"),
          title = "Spillover Analyses, Group-Perceived Compliance",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Group-Per. Comp."),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          label = "tab:spillover_O2",
          header = F,
          font.size = "footnotesize",
          column.labels = c("D2", "D2 - Mixed",
                            "D5", "D5 - Mixed", "D10", "D10 - Mixed",
                            "Ind. Only"),
          out = paste0(out_path, "tableI2.tex"))

### Table I3 ----

#### Models ----
i3_1 <- lm(recent_receipt_7 ~ BU + TD + Both + block_id + enum,
                      data = vendor_end_d2_main,
                      weights = 1/prob_d2)
i3_2 <- lm(recent_receipt_7 ~ BU + TD + Both + block_id + enum,
                      data = vendor_end_d5_main,
                      weights = 1/prob_d5) 
i3_3 <- lm(recent_receipt_7 ~ BU + TD + Both + block_id + enum,
                       data = vendor_end_d10_main,
                       weights = 1/prob_d10) 

i3_1_mix <- lm(recent_receipt_7 ~ BU + TD + Both + block_id + enum,
                          data = vendor_end_d2_mixed,
                          weights = 1/prob_d2_mix) 
i3_2_mix <- lm(recent_receipt_7 ~ BU + TD + Both + block_id + enum,
                          data = vendor_end_d5_mixed,
                          weights = 1/prob_d5_mix)
i3_3_mix <- lm(recent_receipt_7 ~ BU + TD + Both + block_id + enum,
                           data = vendor_end_d10_mixed,
                           weights = 1/prob_d10_mix) 

i3_4 <- lm(recent_receipt_7 ~ BU + TD + Both + block_id + enum,
                        data = vendor_end_ind,
                        weights = 1/prob_d1_ind) 

#### Tex Table ----
stargazer(i3_1,
          i3_1_mix,
          i3_2,
          i3_2_mix,
          i3_3,
          i3_3_mix,
          i3_4,
          se = list(calc.ses.cluster(i3_1, "market", 
                                     data = vendor_end_d2_main),
                    calc.ses.cluster(i3_1_mix, "market", 
                                     data = vendor_end_d2_mixed),
                    calc.ses.cluster(i3_2, "market", 
                                     data = vendor_end_d5_main),
                    calc.ses.cluster(i3_2_mix, "market", 
                                     data = vendor_end_d5_mixed),
                    calc.ses.cluster(i3_3, "market", 
                                     data = vendor_end_d10_main),
                    calc.ses.cluster(i3_3_mix, "market", 
                                     data = vendor_end_d10_mixed),
                    calc.ses.cluster(i3_4, "market", 
                                     data = vendor_end_ind)),
          keep = c("BU", "TD", "Both"),
          title = "Spillover Analyses, Evidence of Recent Receipt",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Evidence of Recent Receipt"),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          header = F,
          label = "tab:spillover_O3",
          font.size = "footnotesize",
          column.labels = c("D2", "D2 - Mixed",
                            "D5", "D5 - Mixed", "D10", "D10 - Mixed",
                            "Ind. Only"),
          out = paste0(out_path, "tableI3.tex"))

## Appendix I.3: Treatment Externalities Approach ----

### Table I4 ----

#### Models ----
h1_o1full_sM_daily <- lm(fee1_full ~ BU + TD + Both +
                           n_2_BU + n_5_BU + n_10_BU + 
                           n_2_TD + n_5_TD + n_10_TD +
                           n_2_Both + n_5_Both + n_10_Both +
                           n_2_all + n_5_all + n_10_all +
                           block_id + enum,
                         data = vendor_end_spill_daily)
h1_o1full_sM_max <- lm(fee1_full ~ BU + TD + Both +
                         n_2_BU + n_5_BU + n_10_BU + 
                         n_2_TD + n_5_TD + n_10_TD +
                         n_2_Both + n_5_Both + n_10_Both +
                         n_2_all + n_5_all + n_10_all +
                         block_id + enum,
                       data = vendor_end_spill_max)
h1_o1full_sM_num <- lm(fee1_full ~ BU + TD + Both +
                         n_2_BU + n_5_BU + n_10_BU + 
                         n_2_TD + n_5_TD + n_10_TD +
                         n_2_Both + n_5_Both + n_10_Both +
                         n_2_all + n_5_all + n_10_all +
                         block_id + enum,
                       data = vendor_end_spill_num)

#### Tex Table ----
stargazer(h1_o1full_sM_daily, 
          h1_o1full_sM_max,
          h1_o1full_sM_num,
          se = list(calc.ses.cluster(h1_o1full_sM_daily, "market", 
                                     data = vendor_end_spill_daily),
                    calc.ses.cluster(h1_o1full_sM_max, "market", 
                                     data = vendor_end_spill_max),
                    calc.ses.cluster(h1_o1full_sM_num, "market", 
                                     data = vendor_end_spill_num)),
          omit = c("Constant", 
                   "enum",
                   "block"),
          title = "Treatment Externalities, Self-Reported Tax Compliance",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Self-Rep. Compliance"),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          single.row = T,
          header = F,
          label = "tab:treatex_O1",
          font.size = "footnotesize",
          column.labels = c("Avg. Vend. pr. Day",
                            "Max Num. Vendors",
                            "Num. Mkts."),
          covariate.labels = c(NA,
                               NA,
                               "BOTH",
                               "N-2-BU",
                               "N-5-BU",
                               "N-10-BU",
                               "N-2-TD",
                               "N-5-TD",
                               "N-10-TD",
                               "N-2-BOTH",
                               "N-5-BOTH",
                               "N-10-BOTH",
                               "N-2-All",
                               "N-5-All",
                               "N-10-All"),
          out = paste0(out_path, "tableI4.tex"))

### Table I5 ----

#### Models ----
h1_o2always_sM_daily <- lm(fee2_always ~ BU + TD + Both +
                             n_2_BU + n_5_BU + n_10_BU + 
                             n_2_TD + n_5_TD + n_10_TD +
                             n_2_Both + n_5_Both + n_10_Both +
                             n_2_all + n_5_all + n_10_all +
                             block_id + enum,
                           data = vendor_end_spill_daily)
h1_o2always_sM_max <- lm(fee2_always ~ BU + TD + Both +
                           n_2_BU + n_5_BU + n_10_BU + 
                           n_2_TD + n_5_TD + n_10_TD +
                           n_2_Both + n_5_Both + n_10_Both +
                           n_2_all + n_5_all + n_10_all +
                           block_id + enum,
                         data = vendor_end_spill_max)
h1_o2always_sM_num <- lm(fee2_always ~ BU + TD + Both +
                           n_2_BU + n_5_BU + n_10_BU + 
                           n_2_TD + n_5_TD + n_10_TD +
                           n_2_Both + n_5_Both + n_10_Both +
                           n_2_all + n_5_all + n_10_all +
                           block_id + enum,
                         data = vendor_end_spill_num)

#### Tex Table ----
stargazer(h1_o2always_sM_daily, 
          h1_o2always_sM_max,
          h1_o2always_sM_num,
          se = list(calc.ses.cluster(h1_o2always_sM_daily, "market", 
                                     data = vendor_end_spill_daily),
                    calc.ses.cluster(h1_o2always_sM_max, "market", 
                                     data = vendor_end_spill_max),
                    calc.ses.cluster(h1_o2always_sM_num, "market", 
                                     data = vendor_end_spill_num)),
          omit = c("Constant", 
                   "enum",
                   "block"),
          title = "Treatment Externalities, Group-Perceived Tax Compliance",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Group-Per. Compliance"),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          single.row = T,
          header = F,
          label = "tab:treatex_O2",
          font.size = "footnotesize",
          column.labels = c("Avg. Vend. pr. Day",
                            "Max Num. Vendors",
                            "Num. Mkts."),
          covariate.labels = c(NA,
                               NA,
                               "BOTH",
                               "N-2-BU",
                               "N-5-BU",
                               "N-10-BU",
                               "N-2-TD",
                               "N-5-TD",
                               "N-10-TD",
                               "N-2-BOTH",
                               "N-5-BOTH",
                               "N-10-BOTH",
                               "N-2-All",
                               "N-5-All",
                               "N-10-All"),
          out = paste0(out_path, "tableI5.tex"))

### Table I6 ----

#### Models ----
h1_o3_r7_sM_daily <- lm(recent_receipt_7 ~ BU + TD + Both +
                          n_2_BU + n_5_BU + n_10_BU + 
                          n_2_TD + n_5_TD + n_10_TD +
                          n_2_Both + n_5_Both + n_10_Both +
                          n_2_all + n_5_all + n_10_all +
                          block_id + enum,
                        data = vendor_end_spill_daily)
h1_o3_r7_sM_max <- lm(recent_receipt_7 ~ BU + TD + Both +
                        n_2_BU + n_5_BU + n_10_BU + 
                        n_2_TD + n_5_TD + n_10_TD +
                        n_2_Both + n_5_Both + n_10_Both +
                        n_2_all + n_5_all + n_10_all +
                        block_id + enum,
                      data = vendor_end_spill_max)
h1_o3_r7_sM_num <- lm(recent_receipt_7 ~ BU + TD + Both +
                        n_2_BU + n_5_BU + n_10_BU + 
                        n_2_TD + n_5_TD + n_10_TD +
                        n_2_Both + n_5_Both + n_10_Both +
                        n_2_all + n_5_all + n_10_all +
                        block_id + enum,
                      data = vendor_end_spill_num)

#### Tex Table ----
stargazer(h1_o3_r7_sM_daily, 
          h1_o3_r7_sM_max,
          h1_o3_r7_sM_num,
          se = list(calc.ses.cluster(h1_o3_r7_sM_daily, "market", 
                                     data = vendor_end_spill_daily),
                    calc.ses.cluster(h1_o3_r7_sM_max, "market", 
                                     data = vendor_end_spill_max),
                    calc.ses.cluster(h1_o3_r7_sM_num, "market", 
                                     data = vendor_end_spill_num)),
          omit = c("Constant", 
                   "enum",
                   "block"),
          title = "Treatment Externalities, Evidence of Recent Receipt",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Evidence of Recent Receipt"),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          single.row = T,
          header = F,
          label = "tab:treatex_O2",
          font.size = "footnotesize",
          column.labels = c("Avg. Vend. pr. Day",
                            "Max Num. Vendors",
                            "Num. Mkts."),
          covariate.labels = c(NA,
                               NA,
                               "BOTH",
                               "N-2-BU",
                               "N-5-BU",
                               "N-10-BU",
                               "N-2-TD",
                               "N-5-TD",
                               "N-10-TD",
                               "N-2-BOTH",
                               "N-5-BOTH",
                               "N-10-BOTH",
                               "N-2-All",
                               "N-5-All",
                               "N-10-All"),
          out = paste0(out_path, "tableI6.tex"))


# Appendix J: Compliance Analysis ----------------------------------------------


## Compliance Intermediate Objects
# merging in compliance data at Vendor
vendor_end_compl <- full_join(vendor_end, 
                        market_only_data, 
                        by = c("market" = "Market", 
                               "district" = "District",
                               "treatment_status" = "treatment_status"))
# and market level
market_lvl_compl <- full_join(market_lvl, 
                        market_only_data, 
                        by = c("market" = "Market", 
                               "District" = "District"))

# filtering to Endline only for DIM models
market_lvl_compl_el <- market_lvl_compl[market_lvl_compl$Endline == 1, ]


## Table J1 ----

### Models ----
j1_1s <- ivreg(fee1_full ~ BU_rcvd_strict +
                                        TD_rcvd_strict +
                                        BOTH_rcvd_strict + 
                                        block_id + 
                                        as.factor(enum) |
                                        BU + TD + Both +
                                        block_id + as.factor(enum),
                                      data = vendor_end_compl,
                                      x = TRUE)
j1_1r <- ivreg(fee1_full ~ BU_rcvd_relaxed +
                                         TD_rcvd_relaxed +
                                         BOTH_rcvd_relaxed + 
                                         block_id + 
                                         as.factor(enum) |
                                         BU + TD + Both +
                                         block_id + as.factor(enum),
                                       data = vendor_end_compl,
                                       x = TRUE)
j1_2s <- ivreg(fee2_always ~ BU_rcvd_strict +
                                          TD_rcvd_strict +
                                          BOTH_rcvd_strict + 
                                          block_id + 
                                          as.factor(enum) |
                                          BU + TD + Both +
                                          block_id + as.factor(enum),
                                        data = vendor_end_compl,
                                        x = TRUE)
j1_2r <- ivreg(fee2_always ~ BU_rcvd_relaxed +
                                           TD_rcvd_relaxed +
                                           BOTH_rcvd_relaxed + 
                                           block_id + 
                                           as.factor(enum) |
                                           BU + TD + Both +
                                           block_id + as.factor(enum),
                                         data = vendor_end_compl,
                                         x = TRUE)
j1_3s <- ivreg(recent_receipt_7 ~ BU_rcvd_strict +
                                       TD_rcvd_strict +
                                       BOTH_rcvd_strict + 
                                       block_id + 
                                       as.factor(enum) |
                                       BU + TD + Both +
                                       block_id + as.factor(enum),
                                     data = vendor_end_compl,
                                     x = TRUE)
j1_3r <- ivreg(recent_receipt_7 ~ BU_rcvd_relaxed +
                                        TD_rcvd_relaxed +
                                        BOTH_rcvd_relaxed+ 
                                        block_id + 
                                        as.factor(enum) |
                                        BU + TD + Both +
                                        block_id + as.factor(enum),
                                      data = vendor_end_compl,
                                      x = TRUE)

### Tex Table ----
# clustered standard errors
TrGr_se <- list(calc.ses.cluster(j1_1s,
                                 "market",
                                 data = vendor_end_compl),
                calc.ses.cluster(j1_1r,
                                 "market",
                                 data = vendor_end_compl),
                calc.ses.cluster(j1_2s, 
                                 "market",
                                 data = vendor_end_compl),
                calc.ses.cluster(j1_2r,
                                 "market",
                                 data = vendor_end_compl),
                calc.ses.cluster(j1_3s, 
                                 "market",
                                 data = vendor_end_compl),
                calc.ses.cluster(j1_3r,
                                 "market",
                                 data = vendor_end_compl))

stargazer(j1_1s,
          j1_1r,
          j1_2s,
          j1_2r,
          j1_3s,
          j1_3r,
          se = TrGr_se,
          keep = c("BU", "TD", "BOTH"),
          title = "Compliance IV Regression 2nd-Stage\nTreatment Group Approach",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Self-Rep. Compl", "Per. Group Compl.", "Recent Rcpt."),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          label = "app:tab:compliance_trgr",
          header = F,
          font.size = "footnotesize",
          column.labels = c(rep(c("Strict", "Relaxed"), 3)),
          covariate.labels = c("BU - Str.",
                               "TD - Str.",
                               "BOTH - Str.",
                               "BU - Rel.",
                               "TD - Rel.",
                               "BOTH - Rel."),
          out = paste0(out_path, "tableJ1.tex"))


## Table J2 ----

### Models ----
j2_1s <- ivreg(fee1_full ~ BU_rcvd_strict +
                                            TD_rcvd_strict +
                                            BOTH_rcvd_strict + 
                                            block_id |
                                            BU + TD + Both +
                                            block_id ,
                                          data = market_lvl_compl_el,
                                          x = TRUE)
j2_1r <- ivreg(fee1_full ~ BU_rcvd_relaxed +
                                             TD_rcvd_relaxed +
                                             BOTH_rcvd_relaxed + 
                                             block_id |
                                             BU + TD + Both +
                                             block_id ,
                                           data = market_lvl_compl_el,
                                           x = TRUE)
j2_2s <- ivreg(fee2_always ~ BU_rcvd_strict +
                                              TD_rcvd_strict +
                                              BOTH_rcvd_strict + 
                                              block_id |
                                              BU + TD + Both +
                                              block_id ,
                                            data = market_lvl_compl_el,
                                            x = TRUE)
j2_2r <- ivreg(fee2_always ~ BU_rcvd_relaxed +
                                               TD_rcvd_relaxed +
                                               BOTH_rcvd_relaxed + 
                                               block_id |
                                               BU + TD + Both +
                                               block_id ,
                                             data = market_lvl_compl_el,
                                             x = TRUE)
j2_3s <- ivreg(recent_receipt_7 ~ BU_rcvd_strict +
                                           TD_rcvd_strict +
                                           BOTH_rcvd_strict + 
                                           block_id |
                                           BU + TD + Both +
                                           block_id ,
                                         data = market_lvl_compl_el,
                                         x = TRUE)
j2_3r <- ivreg(recent_receipt_7 ~ BU_rcvd_relaxed +
                                            TD_rcvd_relaxed +
                                            BOTH_rcvd_relaxed+ 
                                            block_id |
                                            BU + TD + Both +
                                            block_id ,
                                          data = market_lvl_compl_el,
                                          x = TRUE)


### Tex Table ----
stargazer(j2_1s,
          j2_1r,
          j2_2s,
          j2_2r,
          j2_3s,
          j2_3r,
          keep = c("BU", "TD", "BOTH"),
          title = "Compliance IV Regression 2-Stage\nTreatment Group Approach\nMarket Level DIM",
          model.numbers = F,
          notes = c("Models include block fixed effects."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Self-Rep. Compl", "Per. Group Compl.", "Recent Rcpt."),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          label = "app:tab:compliance_trgr_mkt_dim",
          header = F,
          font.size = "footnotesize",
          column.labels = c(rep(c("Strict", "Relaxed"), 3)),
          covariate.labels = c("BU - Str.",
                               "TD - Str.",
                               "BOTH - Str.",
                               "BU - Rel.",
                               "TD - Rel.",
                               "BOTH - Rel."),
          out = paste0(out_path, "tableJ2.tex"))


## Table J3 ----

### Models ----
j3_1s <- ivreg(
  fee1_full ~ Endline*(BU_rcvd_strict +
                         TD_rcvd_strict +
                         BOTH_rcvd_strict) + 
    block_id |
    Endline*(BU + TD + Both) +
    block_id ,
  data = market_lvl_compl,
  x = TRUE)
j3_1r <- ivreg(
  fee1_full ~ Endline*(BU_rcvd_relaxed +
                         TD_rcvd_relaxed +
                         BOTH_rcvd_relaxed) + 
    block_id |
    Endline*(BU + TD + Both) +
    block_id ,
  data = market_lvl_compl,
  x = TRUE)
j3_2s <- ivreg(
  fee2_always ~ Endline*(BU_rcvd_strict +
                           TD_rcvd_strict +
                           BOTH_rcvd_strict) + 
    block_id |
    Endline*(BU + TD + Both) +
    block_id ,
  data = market_lvl_compl,
  x = TRUE)
j3_2r <- ivreg(
  fee2_always ~ Endline*(BU_rcvd_relaxed +
                           TD_rcvd_relaxed +
                           BOTH_rcvd_relaxed) + 
    block_id |
    Endline*(BU + TD + Both) +
    block_id ,
  data = market_lvl_compl,
  x = TRUE)
j3_3s <- ivreg(
  recent_receipt_7 ~ Endline*(BU_rcvd_strict +
                                TD_rcvd_strict +
                                BOTH_rcvd_strict) + 
    block_id |
    Endline*(BU + TD + Both) +
    block_id ,
  data = market_lvl_compl,
  x = TRUE)
j3_3r <- ivreg(
  recent_receipt_7 ~ Endline*(BU_rcvd_relaxed +
                                TD_rcvd_relaxed +
                                BOTH_rcvd_relaxed) + 
    block_id |
    Endline*(BU + TD + Both) +
    block_id ,
  data = market_lvl_compl,
  x = TRUE)

### Tex Table ----
stargazer(j3_1s,
          j3_1r,
          j3_2s,
          j3_2r,
          j3_3s,
          j3_3r,
          keep = c("Endline:"),
          title = "Compliance IV Regression 2nd-Stage\nTreatment Group Approach\nMarket Level DID",
          model.numbers = F,
          notes = c("Models include block fixed effects."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Self-Rep. Compl", "Per. Group Compl.", "Recent Rcpt."),
          star.cutoffs = c(.05, .01, .001),
          #no.space = T,
          label = "app:tab:compliance_trgr_mkt_did",
          header = F,
          font.size = "footnotesize",
          column.labels = c(rep(c("Strict", "Relaxed"), 3)),
          covariate.labels = c("BU - Str.",
                               "TD - Str.",
                               "BOTH - Str.",
                               "BU - Rel.",
                               "TD - Rel.",
                               "BOTH - Rel."),
          out = paste0(out_path, "tableJ3.tex"))

# Appendix K: Multilevel Modeling Analysis -------------------------------------

## Models ----
h1_o1full_mlm1  <- lmer(fee1_full ~ BU + TD + Both + 
                          (1|district/block_id/market) + (1|enum),
                        data = vendor_end)
h1_o2always_mlm1  <- lmer(fee2_always ~ BU + TD + Both + 
                            (1|district/block_id/market) + (1|enum),
                          data = vendor_end)
h1_o3_r7_mlm1  <- lmer(recent_receipt_7 ~ BU + TD + Both + 
                         (1|district/block_id/market) + (1|enum),
                       data = vendor_end)

## Tex Table ----
stargazer(h1_o1full_mlm1,
          h1_o2always_mlm1,
          h1_o3_r7_mlm1,
          keep = c("BU", "TD", "Both"),
          title = "Multilevel Models",
          model.numbers = F,
          notes = c("Models include random intercepts by enumerators and", 
                    "by markets nested in blocks nested in districts."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Self-Rep. Compl", "Per. Group Compl.", "Recent Rcpt."),
          star.cutoffs = c(.05, .01, .001),
          label = "app:tab:mlm",
          header = F,
          font.size = "footnotesize",
          out = paste0(out_path, "tableK1.tex"))

# Appendix L: Heterogeneous Treatment Effects Analysis -------------------------

## Appendix L.1: By Vendor Covariates ----

### Table L1: Gender ----

#### Models ----

# Self-Reported Compliance
#male vendors
l11m <- lm(data = vendor_end[vendor_end$female == 0,], 
                             fee1_full ~ BU + TD + Both +
                               block_id +
                               as.factor(enum))
#female vendors
l11f <- lm(data = vendor_end[vendor_end$female == 1,], 
                               fee1_full ~ BU + TD + Both +
                                 block_id +
                                 as.factor(enum))
# interaction
l11g <- lm(data = vendor_end, 
                               fee1_full ~ female*(BU + TD + Both) +
                                 block_id +
                                 as.factor(enum))
# Perceived Group Compliance
#male vendors
l12m <- lm(data = vendor_end[vendor_end$female == 0,], 
                               fee2_always ~ BU + TD + Both +
                                 block_id +
                                 as.factor(enum))
#female vendors
l12f <- lm(data = vendor_end[vendor_end$female == 1,], 
                                 fee2_always ~ BU + TD + Both +
                                   block_id +
                                   as.factor(enum))
l12g <- lm(data = vendor_end, 
                                 fee2_always ~ female*(BU + TD + Both) +
                                   block_id +
                                   as.factor(enum))
# Evidence of Recent Receipt
#male vendors
l13m <- lm(data = vendor_end[vendor_end$female == 0,], 
                            recent_receipt_7 ~ BU + TD + Both +
                              block_id +
                              as.factor(enum))
#female vendors
l13f <- lm(data = vendor_end[vendor_end$female == 1,], 
                              recent_receipt_7 ~ BU + TD + Both +
                                block_id +
                                as.factor(enum))
l13g <- lm(data = vendor_end, 
                              recent_receipt_7 ~ female*(BU + TD + Both) +
                                block_id +
                                as.factor(enum))

#### Tex Table ----
stargazer(l11m,
          l11f,
          l11g,
          l12m,
          l12f,
          l12g,
          l13m,
          l13f,
          l13g,
          se = list(calc.ses.cluster(l11m, "market", 
                                     data = vendor_end[vendor_end$female == 0,],),
                    calc.ses.cluster(l11f, "market", 
                                     data = vendor_end[vendor_end$female == 1,]),
                    calc.ses.cluster(l11g, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l12m, "market", 
                                     data = vendor_end[vendor_end$female == 0,]),
                    calc.ses.cluster(l12f, "market", 
                                     data = vendor_end[vendor_end$female == 1,]),
                    calc.ses.cluster(l12g, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l13m, "market", 
                                     data = vendor_end[vendor_end$female == 0,]),
                    calc.ses.cluster(l13f, "market", 
                                     data = vendor_end[vendor_end$female == 1,]),
                    calc.ses.cluster(l13g, "market", 
                                     data = vendor_end)),
          font.size = "scriptsize",
          title = "Subgroup Analysis by Gender",
          column.labels = rep(c("Male", "Female", "Int."), 3),
          header = FALSE,
          omit = c("block*", "as.factor*", "Constant"),
          star.cutoffs = c(.05, .01, .001),
          table.placement = "!htbp",
          no.space = TRUE,
          keep.stat = c("adj.rsq", "n"),
          dep.var.caption = '',
          label = "app:tab:het_te:gender",
          dep.var.labels = c("\\makecell{Self-Reported\\\\Compliance}",
                             "\\makecell{Perceived Group\\\\Compliance}",
                             "\\makecell{Evidence of\\\\Recent Receipt}"),
          covariate.labels = c("Female", "BU", "TD", "BOTH", 
                               "Female * BU", "Female * TD", 
                               "Female * BOTH"),
          model.numbers = FALSE,
          out = paste0(out_path, "tableL1.tex"))


### Table L2: Services vs Goods ----

#### Models ----

## Self-Reported Compliance
#vendors who sell primarily goods
l21g <- lm(data = vendor_end[vendor_end$service == 0,], 
                             fee1_full ~ BU + TD + Both +
                               block_id +
                               as.factor(enum))
#vendors who sell primarily services
l21s <- lm(data = vendor_end[vendor_end$service == 1,], 
                                fee1_full ~ BU + TD + Both +
                                  block_id +
                                  as.factor(enum))
# interaction
l21int <- lm(data = vendor_end, 
                                    fee1_full ~ service*(BU + TD + Both) +
                                      block_id +
                                      as.factor(enum))

## Group-Perceived Tax Compliance
#vendors who sell primarily goods
l22g <- lm(data = vendor_end[vendor_end$service == 0,], 
                               fee2_always ~ BU + TD + Both +
                                 block_id +
                                 as.factor(enum))
#vendors who sell primarily services
l22s <- lm(data = vendor_end[vendor_end$service == 1,], 
                                  fee2_always ~ BU + TD + Both +
                                    block_id +
                                    as.factor(enum))
# interaction
l22int <- lm(data = vendor_end, 
                                      fee2_always ~ service*(BU + TD + Both) +
                                        block_id +
                                        as.factor(enum))

## Evidence of Recent Receipt
#vendors who sell primarily goods
l23g <- lm(data = vendor_end[vendor_end$service == 0,], 
                            recent_receipt_7 ~ BU + TD + Both +
                              block_id +
                              as.factor(enum))
#vendors who sell primarily services
l23s <- lm(data = vendor_end[vendor_end$service == 1,], 
                               recent_receipt_7 ~ BU + TD + Both +
                                 block_id +
                                 as.factor(enum))
l23int <- lm(data = vendor_end, 
                                   recent_receipt_7 ~ service*(BU + TD + Both) +
                                     block_id +
                                     as.factor(enum))

#### Tex Table ----
stargazer(l21g,
          l21s,
          l21int,
          l22g,
          l22s,
          l22int,
          l23g,
          l23s,
          l23int,
          se = list(calc.ses.cluster(l21g, "market", 
                                     data = vendor_end[vendor_end$service == 0,],),
                    calc.ses.cluster(l21s, "market", 
                                     data = vendor_end[vendor_end$service == 1,]),
                    calc.ses.cluster(l21int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l22g, "market", 
                                     data = vendor_end[vendor_end$service == 0,]),
                    calc.ses.cluster(l22s, "market", 
                                     data = vendor_end[vendor_end$service == 1,]),
                    calc.ses.cluster(l22int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l23g, "market", 
                                     data = vendor_end[vendor_end$service == 0,]),
                    calc.ses.cluster(l23s, "market", 
                                     data = vendor_end[vendor_end$service == 1,]),
                    calc.ses.cluster(l23int, "market", 
                                     data = vendor_end)),
          font.size = "scriptsize",
          title = "Subgroup Analysis by Services vs Goods",
          column.labels = rep(c("Goods", "Services", "Int."), 3),
          header = FALSE,
          omit = c("block*", "as.factor*", "Constant"),
          star.cutoffs = c(.05, .01, .001),
          table.placement = "!htbp",
          no.space = TRUE,
          keep.stat = c("adj.rsq", "n"),
          dep.var.caption = '',
          label = "app:tab:het_te:service",
          dep.var.labels = c("\\makecell{Self-Reported\\\\Compliance}",
                             "\\makecell{Perceived Group\\\\Compliance}",
                             "\\makecell{Evidence of\\\\Recent Receipt}"),
          covariate.labels = c("Service", "BU", "TD", "BOTH", 
                               "Service * BU", "Service * TD", 
                               "Service * BOTH"),
          model.numbers = FALSE,
          out = paste0(out_path, "tableL2.tex"))

### Table L3: Selling Daily or Not ----

#### Models ----
# Self-Reported Tax Compliance
#vendors who do not sell daily
l31n <- lm(data = vendor_end[vendor_end$sell_daily == 0,], 
                                  fee1_full ~ BU + TD + Both +
                                    block_id +
                                    as.factor(enum))
#vendors who sell daily
l31s <- lm(data = vendor_end[vendor_end$sell_daily == 1,], 
                                   fee1_full ~ BU + TD + Both +
                                     block_id +
                                     as.factor(enum))
# interaction
l31int <- lm(data = vendor_end, 
                                       fee1_full ~ sell_daily*(BU + TD + Both) +
                                         block_id +
                                         as.factor(enum))

# Perceived Group Compliance
#vendors who do not sell daily
l32n <- lm(data = vendor_end[vendor_end$sell_daily == 0,], 
                                    fee2_always ~ BU + TD + Both +
                                      block_id +
                                      as.factor(enum))
#vendors who sell daily
l32s <- lm(data = vendor_end[vendor_end$sell_daily == 1,], 
                                     fee2_always ~ BU + TD + Both +
                                       block_id +
                                       as.factor(enum))
# interaction
l32int <- lm(data = vendor_end, 
                                         fee2_always ~ sell_daily*(BU + TD + Both) +
                                           block_id +
                                           as.factor(enum))

# Evidence of Recent Receipt
#vendors who do not sell daily
l33n <- lm(data = vendor_end[vendor_end$sell_daily == 0,], 
                                 recent_receipt_7 ~ BU + TD + Both +
                                   block_id +
                                   as.factor(enum))
#vendors who sell daily
l33s <- lm(data = vendor_end[vendor_end$sell_daily == 1,], 
                                  recent_receipt_7 ~ BU + TD + Both +
                                    block_id +
                                    as.factor(enum))
# interaction
l33int <- lm(data = vendor_end, 
                                      recent_receipt_7 ~ sell_daily*(BU + TD + Both) +
                                        block_id +
                                        as.factor(enum))


#### Tex Table ----
stargazer(l31n,
          l31s,
          l31int,
          l32n,
          l32s,
          l32int,
          l33n,
          l33s,
          l33int,
          se = list(calc.ses.cluster(l31n, "market", 
                                     data = vendor_end[vendor_end$sell_daily == 0,],),
                    calc.ses.cluster(l31s, "market", 
                                     data = vendor_end[vendor_end$sell_daily == 1,]),
                    calc.ses.cluster(l31int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l32n, "market", 
                                     data = vendor_end[vendor_end$sell_daily == 0,]),
                    calc.ses.cluster(l32s, "market", 
                                     data = vendor_end[vendor_end$sell_daily == 1,]),
                    calc.ses.cluster(l32int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l33n, "market", 
                                     data = vendor_end[vendor_end$sell_daily == 0,]),
                    calc.ses.cluster(l33s, "market", 
                                     data = vendor_end[vendor_end$sell_daily == 1,]),
                    calc.ses.cluster(l33int, "market", 
                                     data = vendor_end)),
          font.size = "scriptsize",
          title = "Subgroup Analysis by Selling Daily or Not",
          column.labels = rep(c("Not Daily", "Daily", "Int."), 3),
          header = FALSE,
          omit = c("block*", "as.factor*", "Constant"),
          star.cutoffs = c(.05, .01, .001),
          table.placement = "!htbp",
          no.space = TRUE,
          keep.stat = c("adj.rsq", "n"),
          dep.var.caption = '',
          label = "app:tab:het_te:sell_daily",
          dep.var.labels = c("\\makecell{Self-Reported\\\\Compliance}",
                             "\\makecell{Perceived Group\\\\Compliance}",
                             "\\makecell{Evidence of\\\\Recent Receipt}"),
          covariate.labels = c("Sell Daily", "BU", "TD", "BOTH", 
                               "Sell Daily * BU",
                               "Sell Daily * TD", 
                               "Sell Daily * BOTH"),
          model.numbers = FALSE,
          out = paste0(out_path, "tableL3.tex"))

### Table L4: Wealth Heterogeneous Effects Analysis ----

#### Models ----
l4_1_hhincome_int <- lm(
  data = vendor_end,
  fee1_full ~ hh_income_top_99*(BU + TD + Both) +
    block_id +
    as.factor(enum))
l4_2_hhincome_int <- lm(
  data = vendor_end,
  fee2_always ~ hh_income_top_99*(BU + TD + Both) +
    block_id +
    as.factor(enum))
l4_3_hhincome_int <- lm(
  data = vendor_end,
  recent_receipt_7 ~ hh_income_top_99*(BU + TD + Both) +
    block_id +
    as.factor(enum))

#### Tex Table ----
stargazer(l4_1_hhincome_int,
          l4_2_hhincome_int,
          l4_3_hhincome_int,
          se = list(calc.ses.cluster(l4_1_hhincome_int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l4_2_hhincome_int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(l4_3_hhincome_int, "market", 
                                     data = vendor_end)),
          font.size = "scriptsize",
          title = "Wealth Heterogeneous Effects Analysis",
          header = FALSE,
          omit = c("block*", "as.factor*", "Constant"),
          table.placement = "!htbp",
          no.space = TRUE,
          keep.stat = c("adj.rsq", "n"),
          star.cutoffs = c(.05, .01, .001),
          dep.var.caption = '',
          label = "app:tab:het_te:wealth",
          dep.var.labels = c("\\makecell{Self-Reported\\\\Compliance}",
                             "\\makecell{Perceived Group\\\\Compliance}",
                             "\\makecell{Evidence of\\\\Recent Receipt}"),
          covariate.labels = c("HH Income", "BU", "TD", "BOTH", 
                               "HH Income * BU",
                               "HH Income * TD", 
                               "HH Income * BOTH"),
          model.numbers = FALSE,
          out = paste0(out_path, "tableL4.tex"))

## Appendix L.2 Market-Level Heterogeneity ----

### Creating Intermediate Objects for Analysis

#adding market data to market_lvl_diffs
market_lvl_diffs_het <- market_lvl_diffs |> 
  right_join(mkt_scoping_samp |> select(mktname, typicalmrktday_total_v,
                                         large_n),
             by = c("market" = "mktname"))
# for use with DIM models make endline only version of mktlvl
market_lvl_het_el <- market_lvl |>
  filter(Endline == 1) |> 
  right_join(mkt_scoping_samp |> select(mktname, typicalmrktday_total_v,
                                         large_n),
             by = c("market" = "mktname"))
#adding market size data to vendor_end
vendor_end_het <- vendor_end |> 
  full_join(market_lvl_diffs_het |>
              select(market, 
                     typicalmrktday_total_v),
            by = ("market" = "market"))

### Table L5: Market Size ----

#### Models ----

# Self-Reported Compliance
l51a <- lm(
  data = market_lvl_diffs_het, 
  fee1_full ~ typicalmrktday_total_v*(BU + TD + Both) + block_id)
l51b <- lm(
  data = market_lvl_het_el, 
  fee1_full ~ typicalmrktday_total_v*(BU + TD + Both) + block_id)
l51c <- lm(
  data = vendor_end_het, 
  fee1_full ~ typicalmrktday_total_v*(BU + TD + Both) +
    block_id +
    as.factor(enum))

# Perceived Group Compliance
l52a <- lm(
  data = market_lvl_diffs_het, 
  fee2_always ~ typicalmrktday_total_v*(BU + TD + Both) + block_id)
l52b <- lm(
  data = market_lvl_het_el, 
  fee2_always ~ typicalmrktday_total_v*(BU + TD + Both) + block_id)
l52c <- lm(
  data = vendor_end_het, 
  fee2_always ~ typicalmrktday_total_v*(BU + TD + Both) +
    block_id +
    as.factor(enum))

# Evidence of Recent Receipt
l53a <- lm(
  data = market_lvl_diffs_het, 
  recent_receipt_7 ~ typicalmrktday_total_v*(BU + TD + Both) + block_id)
l53b <- lm(
  data = market_lvl_het_el, 
  recent_receipt_7 ~ typicalmrktday_total_v*(BU + TD + Both) + block_id)
l53c <- lm(
  data = vendor_end_het, 
  recent_receipt_7 ~ typicalmrktday_total_v*(BU + TD + Both) +
    block_id +
    as.factor(enum))

#### Tex Table ----
# standard errors for stargazer
mkt_size_se <- list(NULL, NULL, 
                    calc.ses.cluster(l51c, "market", 
                                     data = vendor_end_het),
                    NULL, NULL,
                    calc.ses.cluster(l52c, "market", 
                                     data = vendor_end_het),
                    NULL, NULL,
                    calc.ses.cluster(l53c, "market", 
                                     data = vendor_end_het))

stargazer(l51a,
          l51b,
          l51c,
          l52a,
          l52b,
          l52c,
          l53a,
          l53b,
          l53c,
          se = mkt_size_se,
          omit = c("as.factor", "block", "Constant"),
          column.labels = rep(c("Mkt. DID", "Mkt DIM", "Ind. DIM"), 3),
          title = "Het. Treatment Effects by Market Size",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "Market-level models include block fixed-effects."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          font.size = "scriptsize",
          dep.var.caption = '',
          dep.var.labels = c("\\makecell{Self-Reported\\\\Compliance}",
                             "\\makecell{Perceived Group\\\\Compliance}",
                             "\\makecell{Evidence of\\\\Recent Receipt}"),
          covariate.labels = c("Market Size", "BU", "TD", "BOTH", 
                               "Market Size * BU", "Market Size * TD", 
                               "Market Size * BOTH"),
          header = F,
          digits = 4,
          column.sep.width = "0pt",
          star.cutoffs = c(.05, .01, .001),
          label = "app:tab:het_te:mkt_size",
          out = paste0(out_path, "tableL5.tex"))


### Table L6: Collective Action Propensity ----

### Creating Collective Action Propensity Measure
col_act_df <- vendor_end |> 
  mutate(s15_num = as.numeric(s15),
         s15_num = abs(s15_num - max(s15_num, na.rm = T))) |> 
  group_by(market) |>
  summarize(col_act_prop = mean(s15_num, na.rm = T),
            n = sum(!is.na(s15_num)))
col_act_df$col_act_prop_scaled <- as.vector(scale(col_act_df$col_act_prop))
market_lvl_diffs_het <- full_join(market_lvl_diffs_het,
                                  col_act_df |> select(-n),
                              by = c("market" = "market"))

#### Models ----
l61_mkt_did_diff_col_act <- lm(
  data = market_lvl_diffs_het, 
  fee1_full ~ col_act_prop_scaled*(BU + TD + Both) + block_id)
l62_mkt_did_diff_col_act <- lm(
  data = market_lvl_diffs_het, 
  fee2_always ~ col_act_prop_scaled*(BU + TD + Both) + block_id)
l63_mkt_did_diff_col_act <- lm(
  data = market_lvl_diffs_het, 
  recent_receipt_7 ~ col_act_prop_scaled*(BU + TD + Both) + block_id)

#### Tex Table ----
stargazer(l61_mkt_did_diff_col_act,
          l62_mkt_did_diff_col_act,
          l63_mkt_did_diff_col_act,
          label = "app:tab:het_te:col_act_prop",
          title = "Het. Treatment Effects by Collective Action Propensity",
          model.numbers = F,
          font.size = "scriptsize",
          header = FALSE,
          omit = c("block*", "as.factor*", "Constant"),
          table.placement = "!htbp",
          no.space = TRUE,
          star.cutoffs = c(.05, .01, .001),
          keep.stat = c("adj.rsq", "n"),
          dep.var.caption = '',
          dep.var.labels = c("\\makecell{Self-Reported\\\\Compliance}",
                             "\\makecell{Perceived Group\\\\Compliance}",
                             "\\makecell{Evidence of\\\\Recent Receipt}"),
          covariate.labels = c("Collective Action Propensity", "BU", "TD", "BOTH", 
                               "Collective Action Propensity * BU", 
                               "Collective Action Propensity * TD", 
                               "Collective Action Propensity * BOTH"),
          notes = c("All models market-level DID models.",
                    "All models include block fixed-effects."),
          out = paste0(out_path, "tableL6.tex"))

# Appendix M: Linear Hypothesis Tests for Main Paper Models --------------------

lin_hyps <- c("BU = TD", "BU = Both", "TD = Both",
              "BU + TD = Both")
## Table M1 ----
main_mods_names <- c("\\makecell{Receipt Outcome\\\\(Individual DIM)}",
                     "\\makecell{Receipt Outcome\\\\(Market DID)}")

# For Main Paper Table 2
get_linhyps_ps_mods(
  list(r7_ind = h1_o3_r7,
       r7_did = h1_o3_r7_mkt_did_diff),
  lin_hyps = lin_hyps,
  cluster = list(vendor_end$market, NULL),
  correct = FALSE
) |> 
  print_linhyps_table(
    mod_names = main_mods_names,
    adj_only = FALSE,
    caption = "$p$-Values for Linear Hypothesis (Wald) Tests for Receipt Outcome Models in Table 2",
    label = "app:tab:lin_hyps:main",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableM1.tex")
  )

## Table M2 ----
bu_mods_h4 <- list(h4_dg = h4_dg,
                   h4_wc = h4_wc)
bu_mods_h5 <- list(h5_tr9e = h5_tr9e,
                   h5_tr9g = h5_tr9g,
                   h5_tr9h = h5_tr9h)
bu_mods_list1 <- list(bu_mods_h4, bu_mods_h5)
bu_mods_names1 <- c("\\makecell{Trust in\\\\Local\\\\Government}",
                    "\\makecell{Trust in\\\\Ward\\\\Councilors}",
                    "\\makecell{District\\\\Manages\\\\Funds Well}",
                    "\\makecell{District\\\\Spending\\\\Transparent}",
                    "\\makecell{District\\\\Tax Collection\\\\Transparent}")

# For Main Paper Table 3
map(bu_mods_list1, \(x) get_linhyps_ps_mods(x, lin_hyps = lin_hyps,
                                            clusters = list(vendor_end$market),
                                            correct = FALSE)) |> 
  list_rbind() |> 
  print_linhyps_table(
    mod_names = bu_mods_names1,
    caption = "$p$-Values for Linear Hypothesis (Wald) Tests for Bottom-Up Mechanism Models in Table 3",
    label = "app:tab:lin_hyps:bu1",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableM2.tex")
  )

## Table M3 ----
bu_mods_h6 <- list(h6_ms10 = h6_ms10,
                   h6_ms1 = h6_ms1,
                   h6_tc2_10 = h6_tc2_10)
bu_mods_h7 <- list(h7_taxduty = h7_taxduty,
                   h7_tax_even_disagree = h7_tax_even_disagree)
bu_mods_list2 <- list(bu_mods_h6, bu_mods_h7)
bu_mods_names2 <- c("\\makecell{Satisfaction\\\\with\\\\Services}",
                    "\\makecell{Satisfaction\\\\with\\\\Water Access}",
                    "\\makecell{Perceptions\\\\of Spending\\\\on Services}",
                    "\\makecell{Paying\\\\Taxes\\\\Is Duty}",
                    "\\makecell{Vendors\\\\Should\\\\Pay Taxes}")

# For Main Paper Table 4
map(bu_mods_list2, \(x) get_linhyps_ps_mods(x, lin_hyps = lin_hyps,
                                            clusters = list(vendor_end$market),
                                            correct = FALSE)) |> 
  list_rbind() |> 
  print_linhyps_table(
    mod_names = bu_mods_names2,
    caption = "$p$-Values for Linear Hypothesis (Wald) Tests for Bottom-Up Mechanism Models in Table 4",
    label = "app:tab:lin_hyps:bu2",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableM3.tex")
  )

## Table M4 ----
h8_mods <- list(vote_intend = h8_election3,
                petition = h8_p1_el,
                petition_name = h8_p2_el,
                stmt1_agree = h8_st1_el,
                stmt2_agree = h8_st2_el)

h8_mods_names <- c(
  "Vote",
  "\\makecell{Sign Petition\\\\Anonymously}",
  "\\makecell{Sign Petition\\\\with Name}",
  "\\makecell{Reduce Gov.\\\\Over-reach}",
  "\\makecell{Increase Gov.\\\\Effectiveness}"
)

# For Main Paper Table 5
get_linhyps_ps_mods(h8_mods,
                    lin_hyps = lin_hyps,
                    clusters = list(vendor_end$market),
                    correct = FALSE) |> 
  print_linhyps_table(
    mod_names = h8_mods_names,
    caption = "Linear Hypothesis (Wald) Tests for Political Engagement Outcome Models in Table 5",
    label = "app:tab:lin_hyps:pol-eng",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableM4.tex")
  )

## Table M5 ----
td_mods_h9 <- list(h9_tc5a = h9_tc5a,
                   h9_tc5b = h9_tc5b,
                   h9_tc2_15b = h9_tc2_15b) |> 
  get_linhyps_ps_mods(lin_hyps = lin_hyps,
                      clusters = list(vendor_end$market),
                      correct = FALSE)

td_mods_h10 <- list(h10_tc9 = h10_tc9) |> 
  get_linhyps_ps_mods(lin_hyps = lin_hyps,
                      clusters = list(vendor_end$market),
                      correct = FALSE)

td_mods_h11 <- list(h11_hrs_in_mkt = h11_hrs_in_mkt,
                    h11_vendors_visited = h11_vendors_visited) |> 
  get_linhyps_ps_mods(lin_hyps = lin_hyps,
                      clusters = list(tax_collector_end$market),
                      correct = FALSE)

td_mods_names <- c(
  "\\makecell{Ind'l Evasion\\\\Possible}",
  "\\makecell{Group Evasion\\\\Possible}",
  "\\makecell{Pay Because\\\\Consequences}",
  "\\makecell{Money Flowing\\\\to Gov't}",
  "\\makecell{Hours Working\\\\in Market\\\\per Day}",
  "\\makecell{Vendors Visited\\\\in Market\\\\per Day}"
)

# For Main Paper Tables 6-7
list(td_mods_h9, td_mods_h10, td_mods_h11) |> 
  list_rbind() |> 
  print_linhyps_table(
    mod_names = td_mods_names,
    caption = "$p$-Values for Linear Hypothesis (Wald) Tests for Top-Down Mechanism Models in Tables 6-7",
    label = "app:tab:lin_hyps:td",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableM5.tex")
  )  

# Appendix N: General Robustness Models ----------------------------------------

## Appendix N.1: Main Outcomes ----

### Figure N1 ----

#### Necessary Data Manipulation ----

# arrange in descending order
market_lvl_diffs_n1 <- market_lvl_diffs |> 
  arrange(desc(recent_receipt_7))

# adding r7 proportions in to help calculate std error
r7 <- market_lvl |> 
  select(market, recent_receipt_7, c_recent_receipt, Endline) |> 
  mutate(Endline = if_else(Endline == 1, "Endline", "Baseline")) |> 
  pivot_wider(values_from = c(recent_receipt_7, c_recent_receipt),
              names_from = Endline)
market_lvl_diffs_n1 <- market_lvl_diffs_n1 |> 
  left_join(r7, by = "market") |> 
  rename(r7_end = recent_receipt_7_Endline,
         r7_base = recent_receipt_7_Baseline,
         r7_c_end = c_recent_receipt_Endline,
         r7_c_base = c_recent_receipt_Baseline)

# adding 95% confidence intervals for difference in proportion
market_lvl_diffs_n1 <- market_lvl_diffs_n1 |> 
  mutate(r7_std_error = sqrt((r7_end * (1 - r7_end)) / r7_c_end +
                               (r7_base * (1 - r7_base)) / r7_c_base),
         r7_lower = recent_receipt_7 - qnorm(0.95) * r7_std_error,
         r7_upper = recent_receipt_7 + qnorm(0.95) * r7_std_error)

# anonymizing market
market_lvl_diffs_n1 <- market_lvl_diffs_n1 |>
  mutate(market = factor(market, levels = market),
         market = factor(as.numeric(market), 
                         levels = as.numeric(market)))

#### Figure ----

# defining common axis tick size 
axis_tick_size <- 7

# get max name length
long_name_length <- max(sapply(as.numeric(market_lvl_diffs_n1$market), nchar))

# Control
control_r7_diff_anon <- market_lvl_diffs_n1 |> 
  filter(BU_treat == 0 & TD_treat == 0) |> 
  ggplot(aes(x = recent_receipt_7, 
             y = market)) +
  geom_point(aes(shape = District)) +
  geom_errorbarh(aes(xmin = r7_lower, xmax = r7_upper,
                     linetype = District),
                 linewidth = 0.2, height = 0, na.rm = TRUE) +
  theme_bw() +
  labs(title = "Control",
       y ="Market", 
       x = "Difference in Proporton of Respondents Presenting Receipt (Endline - Baseline)",
       shape = "District",
       linetype = "District") +
  geom_vline(xintercept = 0, color = "red") +
  xlim(c(-0.5, 1)) +
  scale_y_discrete(labels = function(x) formatC(x, width = long_name_length))  +
  theme(axis.text = element_text(size = axis_tick_size,
                                 family = "mono"),
        plot.title = element_text(hjust = 0.5)) +
  scale_shape_manual(values = 1:8)


# Bottom Up
BU_r7_diff_anon <- market_lvl_diffs_n1 |> 
  filter(BU == 1) |>
  ggplot(aes(x = recent_receipt_7, 
             y = market)) +
  geom_point(aes(shape = District)) +
  geom_errorbarh(aes(xmin = r7_lower, xmax = r7_upper,
                     linetype = District),
                 linewidth = 0.2, height = 0, na.rm = TRUE) +
  theme_bw() +
  scale_y_discrete(labels = function(x) formatC(x, width = long_name_length)) +
  labs(title = "BU",
       y = "Market", 
       x = "Difference in Proporton of Respondents Presenting Receipt (Endline - Baseline)",
       shape = "District",
       linetype = "District"
  ) +
  geom_vline(xintercept = 0, color = "red") +
  xlim(c(-0.5, 1)) +
  theme(axis.text = element_text(size = axis_tick_size,
                                 family = "mono"),
        plot.title = element_text(hjust = 0.5)) +
  scale_shape_manual(values = 1:8)


# Top Down
TD_r7_diff_anon <- market_lvl_diffs_n1 |> 
  filter(TD == 1) |> 
  ggplot(aes(x = recent_receipt_7, 
             y = market)) +
  geom_point(aes(shape = District)) +
  geom_errorbarh(aes(xmin = r7_lower, xmax = r7_upper,
                     linetype = District),
                 linewidth = 0.2, height = 0, na.rm = TRUE) +
  theme_bw() +
  labs(title = "TD",
       y = "Market", 
       x = "Difference in Proporton of Respondents Presenting Receipt (Endline - Baseline)",
       shape = "District",
       linetype = "District") +
  scale_y_discrete(labels = function(x) formatC(x, width = long_name_length)) +
  geom_vline(xintercept = 0, color = "red") +
  xlim(c(-0.5, 1)) +
  theme(axis.text = element_text(size = axis_tick_size,
                                 family = "mono"),
        plot.title = element_text(hjust = 0.5)) +
  scale_shape_manual(values = 1:8)


# Both
Both_r7_diff_anon <- market_lvl_diffs_n1 |> 
  filter(Both == 1) |> 
  ggplot(aes(x = recent_receipt_7, 
             y = market)) +
  geom_point(aes(shape = District)) +
  geom_errorbarh(aes(xmin = r7_lower, xmax = r7_upper,
                     linetype = District),
                 linewidth = 0.2, height = 0, na.rm = TRUE) +
  theme_bw() +
  labs(title = "Both",
       y = "Market", 
       x = "Difference in Proporton of Respondents Presenting Receipt (Endline - Baseline)",
       shape = "District",
       linetype = "District") +
  geom_vline(xintercept = 0, color = "red") +
  xlim(c(-0.5, 1)) +
  scale_y_discrete(labels = function(x) formatC(x, width = long_name_length)) +
  theme(axis.text = element_text(size = axis_tick_size,
                                 family = "mono"),
        plot.title = element_text(hjust = 0.5)) +
  scale_shape_manual(values = 1:8)

n1 <- control_r7_diff_anon + BU_r7_diff_anon +
  TD_r7_diff_anon + Both_r7_diff_anon +
  plot_layout(guides = "collect",
              axis_titles = "collect")

ggsave(filename = paste0(out_path, "figureN1.png"),
       plot = n1,
       width = 10,
       height = 8,
       units = "in")

### Figure N2 ----
diffs_plot_data <- market_lvl_diffs |> group_by(treatment_status) |> 
  summarise(r7_mean = mean(recent_receipt_7),
            se = sd(recent_receipt_7)/sqrt(n()),
            UB = r7_mean + qt(.975, n() - 1) * se,
            LB = r7_mean - qt(.975, n() - 1) * se) |> 
  mutate(treatment_status = factor(treatment_status,
                                   levels = c("Control", "BU", "TD", "BOTH")))
market_lvl_diffs$treatment_status <- factor(market_lvl_diffs$treatment_status,
                                            levels = c("Control", "BU", "TD", "BOTH"))

#set seed to make jitter be the same each time
set.seed(1234)
n2 <- ggplot(market_lvl_diffs) +
  geom_jitter(aes(x = treatment_status, y = recent_receipt_7),
              height = 0,
              width = 0.1) +
  geom_point(aes(x = treatment_status, y = r7_mean), data = diffs_plot_data,
             size = 3,
             alpha = .3) +
  geom_errorbar(aes(x = treatment_status, ymin = LB,
                    ymax = UB),
                data = diffs_plot_data, 
                width = .3,
                linewidth = 1.25,
                alpha = .5) + 
  theme_bw() +
  labs(x = "Treatment Status",
       y = "Diff. in Proportion Able to Show Evidence \n of Receipt from Within 7 Days") +
  geom_hline(yintercept = 0)
ggsave(filename = paste0(out_path, "figureN2.png"),
       plot = n2,
       width = 6, height = 4, unit = "in")

### Appendix N.1.1: Main Outcomes - Self-Reported and Perceived Group ----

#### Models ----

### Individual-Level DIM
h1_o1full <- lm(data = vendor_end, fee1_full ~ BU + TD + Both + 
                          block_id + as.factor(enum))
h1_o2always <- lm(data = vendor_end, fee2_always ~ BU + TD + Both + 
                            block_id + as.factor(enum))
# h1_o3_r7 in main models

### Market-Level DID
h1_o1full_mkt_did_diff <- lm(data = market_lvl_diffs,
                             fee1_full ~ BU + TD + Both + block_id)
h1_o2always_mkt_did_diff <- lm(data = market_lvl_diffs,
                               fee2_always ~ BU + TD + Both + block_id)
# h1_o3_r7_mkt_did_diff in main models

#### Tex Table ----

# able N1: Hypothesis 1 Results Table - Individual-Level DIM and Market-Level DID

#panel A
stargazer(h1_o1full,
          h1_o2always,
          h1_o3_r7,
          se = list(calc.ses.cluster(h1_o1full, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h1_o2always, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h1_o3_r7, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both"),
          font.size = "footnotesize",
          title = "Panel A: Individual-Level DIM Models",
          label = "tab:main",
          model.numbers = F,
          omit.table.layout = "n",
          keep.stat = c("n", "adj.rsq"),
          dep.var.labels = c("\\makecell{Self-Reported\\\\Full Tax Compliance}",
                             "\\makecell{Perception of\\\\Others' Always Complying}",
                             "\\makecell{Evidence of Receipt\\\\from Past 7 Days}"),
          dep.var.caption = "",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "tableN1_panelA.tex"))
#panel B
stargazer(h1_o1full_mkt_did_diff,
          h1_o2always_mkt_did_diff,
          h1_o3_r7_mkt_did_diff,
          keep = c("BU", "TD", "Both"),
          font.size = "footnotesize",
          title = "Panel B: Market-Level DID Models",
          model.numbers = F,
          notes = c("Individual-level models include enumerator and block fixed-effects",
                    "Individual-level models have SEs clustered on market.",
                    "Market-level models include block fixed-effects."),
          keep.stat = c("n", "adj.rsq"),
          dep.var.labels = c("\\makecell{Self-Reported\\\\Full Tax Compliance}",
                             "\\makecell{Perception of\\\\Others' Always Complying}",
                             "\\makecell{Evidence of Receipt\\\\from Past 7 Days}"),
          dep.var.caption = "",
          star.cutoffs = c(.05, .01, .001),
          notes.label = "Notes",
          header = F,
          out = paste0(out_path, "tableN1_panelB.tex"))


### Appendix N.1.2: Other Main Outcome Specifications ----

#### Data Manipulation ----

##### Creating Endline Version of market_lvl ----
market_lvl_el <- market_lvl |> 
  filter(Endline == 1)

##### Merging Baseline Averages (for DID 1) ----

###### Vendor -----
#Creating Market Averages of Baseline Data, Then Merging with Endline
mkt_avg_ven <- vendor_base |> group_by(market) |> 
  summarise(fee1_full_bl_avg = mean(fee1_full, na.rm = T),
            fee2_always_bl_avg = mean(fee2_always, na.rm = T),
            recent_receipt_7_bl_avg = mean(recent_receipt_7, na.rm = T),
            recent_receipt_7_ofcl_bl_avg = mean(recent_receipt_7_ofcl, na.rm = T),
            recent_receipt_7_v2a_bl_avg = mean(recent_receipt_7_v2a, na.rm = T),
            recent_receipt_7_v2b_bl_avg = mean(recent_receipt_7_v2b, na.rm = T),
            recent_receipt_10_bl_avg = mean(recent_receipt_10, na.rm = T),
            recent_receipt_10_ofcl_bl_avg = mean(recent_receipt_10_ofcl, na.rm = T),
            recent_receipt_10_v2a_bl_avg = mean(recent_receipt_10_v2a, na.rm = T),
            recent_receipt_10_v2b_bl_avg = mean(recent_receipt_10_v2b, na.rm = T),
            tr1_bl_avg = mean(tr1_num, na.rm = T),
            tr2_bl_avg = mean(tr2_num, na.rm = T),
            tr9e_bl_avg = mean(tr9e_num, na.rm = T),
            ms1_bl_avg = mean(ms1_num, na.rm = T),
            ms4_bl_avg = mean(ms4_num, na.rm = T),
            ms3_bl_avg = mean(ms3_num, na.rm = T),
            ms5_bl_avg = mean(ms5_num, na.rm = T),
            ms6_bl_avg = mean(ms6_num, na.rm = T),
            satisfaction_dev_bl_avg = mean(satisfaction_dev_num, na.rm = T),
            tax_morale_bl_avg = mean(tax_morale_num, na.rm = T),
            tc2_4b_bl_avg = mean(tc2_4b_num, na.rm = T),
            tc5a_bl_avg = mean(tc5a_num, na.rm = T),
            tc5b_bl_avg = mean(tc5b_num, na.rm = T),
            tc2_15b_bl_avg = mean(tc2_15b_num, na.rm = T),
            tc2_10_bl_avg = mean(tc2_10_clean, na.rm = T),
            pay_even_disagree_bl_avg = mean(pay_even_disagree, na.rm = T),
            no_rcpt_when_pay_bl_avg = mean(no_rcpt_when_pay_num, na.rm = T),
            no_rcpt_when_pay_uncommon_bl_avg = mean(no_rcpt_when_pay_num > 2, 
                                                    na.rm = T),
            sp1_yn_avg = mean(sp1_yn, na.rm = T),
            sp3_yn_avg = mean(sp3_yn, na.rm = T),
            sell_in_othr_mkts_avg = mean(sell_in_othr_mkts, na.rm = T))

#merge with vendor_end
vendor_end <- left_join(vendor_end, 
                        mkt_avg_ven, by = c("market" = "market"))

###### Tax Collector ----

#create market "average" then merge with endline data
mkt_avg_tax <- tax_collector_base |> group_by(market) |> 
  summarise(hrs_in_mkt_bl_avg = mean(hrs_in_mkt, na.rm = T),
            vendors_visited_trim_99_bl_avg = mean(vendors_visited_trim_99, 
                                                  na.rm = T))

# merge with tax_collector_end
tax_collector_end <- left_join(tax_collector_end, mkt_avg_tax, 
                               by = c("market" = "market"))

##### Rowbinding Endline and Baseline (for DID 2) ----

###### Vendor ----
#create Endline indicator in both dataframes
vendor_end$Endline <- 1
vendor_base$Endline <- 0

#we retain only variables of interest for DID, as otherwise the merge process
#between endline and baseline becomes too complicated, as there are numerous 
#type mismatches between the two versions of the survey
vendor_all <- bind_rows(vendor_base |> 
                          select(#admin vars
                            district, market, enum, sup, BU_treat, TD_treat,
                            BU, TD, Both, treatment_status, Endline, 
                            block_id,
                            #demo vars
                            age, female, service, sell_daily,
                            #outcome vars
                            fee1_full, fee2_always, recent_receipt_7, 
                            recent_receipt_7_ofcl, recent_receipt_10,
                            recent_receipt_10_ofcl,
                            no_rcpt_when_pay, no_rcpt_when_pay_num,
                            tr1_num, tr2_num,
                            tr9e_num, ms1_num, ms3_num,
                            ms4_num, ms5_num, ms6_num, satisfaction_dev_num, 
                            tc2_10_clean,
                            pay_even_disagree, tc2_4b_num, tc5a_num, 
                            tc5b_num, tc2_15b_num,
                            fee1_full, fee2_always, recent_receipt_7, 
                            tr1_clean, tr2_clean,
                            tr9e_clean, ms1_clean, 
                            ms3_clean, ms4_clean, ms5_clean, ms6_clean, 
                            satisfaction_dev, 
                            pay_even_disagree, tc2_4b_clean, tc5a_clean, 
                            tc5b_clean, tc2_15b_clean,
                            #spillover vars
                            sp1_yn, sp3_yn, sell_in_othr_mkts), 
                        vendor_end |> 
                          select(#admin vars
                            district, market, enum, sup, BU_treat, TD_treat,
                            BU, TD, Both, treatment_status, Endline, 
                            block_id,
                            #demo vars
                            age, female, service, sell_daily,
                            #outcome vars
                            fee1_full, fee2_always, recent_receipt_7,
                            recent_receipt_7_ofcl, recent_receipt_10,
                            recent_receipt_10_ofcl,
                            no_rcpt_when_pay, no_rcpt_when_pay_num,
                            tr1_num, tr2_num,
                            tr9e_num, tr9g_num, tr9h_num, ms1_num, ms3_num,
                            ms4_num, ms5_num, ms6_num, satisfaction_dev_num, 
                            tc2_10_clean,
                            pay_even_disagree, tc2_4b_num, tc5a_num, 
                            tc5b_num, tc2_15b_num,
                            fee1_full, fee2_always, recent_receipt_7, 
                            tr1_clean, tr2_clean,
                            tr9e_clean, tr9g_clean, tr9h_clean, ms1_clean, 
                            ms3_clean, ms4_clean, ms5_clean, ms6_clean, 
                            satisfaction_dev, 
                            pay_even_disagree, tc2_4b_clean, tc5a_clean, 
                            tc5b_clean, tc2_15b_clean, tc9_clean,
                            #spillover vars
                            sp1_yn, sp3_yn, sell_in_othr_mkts))


###### Tax Collector -----
#create Endline indicator in both dataframes
tax_collector_base$Endline <- 0
tax_collector_end$Endline <- 1

tax_collector_all <- bind_rows(tax_collector_base |> 
                                 select(#admin vars
                                   district, market, enum, sup, BU_treat, TD_treat,
                                   BU, TD, Both, treatment_status, Endline,
                                   block_id,
                                   #outcome vars
                                   vendors_visited, vendors_visited_trim_99,
                                   vendors_visited_top_99, hrs_in_mkt),
                               tax_collector_end |> 
                                 select(#admin vars
                                   district, market, enum, sup, BU_treat, TD_treat,
                                   BU, TD, Both, treatment_status, Endline,
                                   block_id,
                                   #outcome vars
                                   vendors_visited, vendors_visited_trim_99,
                                   vendors_visited_top_99, hrs_in_mkt))

#### Table N2: Ability to Produce a Receipt Robustness Models ----

##### Models ----
h1_o3_r7_mkt_dim <- lm(data = market_lvl_el, recent_receipt_7 ~ BU + TD + Both + 
                         block_id)
h1_o3_r7_mkt_did <- lm(data = market_lvl, 
                       recent_receipt_7 ~ Endline + BU + TD + Both + Endline:BU +
                         Endline:TD + Endline:Both + block_id)
h1_o3_r7_ind_did1 <- lm(data = vendor_all, 
                        recent_receipt_7 ~ Endline + BU + TD + Both + Endline:BU +
                          Endline:TD + Endline:Both + block_id)
h1_o3_r7_ind_did2 <- lm(data = vendor_end,
                        recent_receipt_7 ~ BU + TD + Both + recent_receipt_7_bl_avg + 
                          block_id + enum)

##### Tex Table ----
stargazer(h1_o3_r7_mkt_dim,
          h1_o3_r7_mkt_did,
          h1_o3_r7_ind_did1,
          h1_o3_r7_ind_did2,
          se = list(NULL,
                    NULL,
                    calc.ses.cluster(h1_o3_r7_ind_did1, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h1_o3_r7_ind_did2, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "recent_receipt_7_bl_avg"),
          column.labels = c("Market DIM", "Market DID", "Individual DID",
                            "\\makecell{Individual DID\\\\Incl. Lagged DV}"),
          title = "Ability to Produce a Receipt Robustness Models",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "Market-level models include block fixed-effects."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          font.size = "footnotesize",
          dep.var.caption = '',
          star.cutoffs = c(.05, .01, .001),
          dep.var.labels = c("\\makecell{Evidence of Receipt\\\\from Past 7 Days}"),
          header = F,
          label = "app:tab:o3_robust",
          out = paste0(out_path, "tableN2.tex"))

#### Table N3: Self-Reported Compliance Robustness Models ----

##### Models ----
n3_mkt_dim <- lm(data = market_lvl_el, fee1_full ~ BU + TD + Both + 
                          block_id)
n3_mkt_did <- lm(data = market_lvl, 
                        fee1_full ~ Endline + BU + TD + Both + Endline:BU +
                          Endline:TD + Endline:Both + block_id)
n3_ind_did1 <- lm(data = vendor_all, 
                         fee1_full ~ Endline + BU + TD + Both + Endline:BU +
                           Endline:TD + Endline:Both + block_id)
n3_ind_did2 <- lm(data = vendor_end,
                         fee1_full ~ BU + TD + Both + fee1_full_bl_avg + 
                           block_id + enum)

##### Tex Table ----
stargazer(n3_mkt_dim,
          n3_mkt_did,
          n3_ind_did1,
          n3_ind_did2,
          se = list(NULL,
                    NULL,
                    calc.ses.cluster(n3_ind_did1, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(n3_ind_did2, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "fee1_full_bl_avg"),
          column.labels = c("Market DIM", "Market DID", "Individual DID",
                            "\\makecell{Individual DID\\\\Incl. Lagged DV}"),
          title = "Self-Reported Tax Compliance Robustness Models",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "Market-level models include block fixed-effects."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.caption = '',
          star.cutoffs = c(.05, .01, .001),
          dep.var.labels = c("\\makecell{Self-Reported\\\\Full Compliance}"),
          header = F,
          font.size = "footnotesize",
          label = "app:tab:o1_robust",
          out = paste0(out_path, "tableN3.tex"))

#### Table N4: Perceived Group Tax Compliance Robustness Models ----

##### Models ----
n4_mkt_dim <- lm(data = market_lvl_el, fee2_always ~ BU + TD + Both + 
                            block_id)
n4_mkt_did <- lm(data = market_lvl, 
                          fee2_always ~ Endline + BU + TD + Both + Endline:BU +
                            Endline:TD + Endline:Both + block_id)
n4_ind_did1 <- lm(data = vendor_all, 
                           fee2_always ~ Endline + BU + TD + Both + Endline:BU +
                             Endline:TD + Endline:Both + block_id)
n4_ind_did2 <- lm(data = vendor_end,
                           fee2_always ~ BU + TD + Both + fee2_always_bl_avg + 
                             block_id + enum)

##### Tex Table ----
stargazer(n4_mkt_dim,
          n4_mkt_did,
          n4_ind_did1,
          n4_ind_did2,
          se = list(NULL,
                    NULL,
                    calc.ses.cluster(n4_ind_did1, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(n4_ind_did2, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "fee2_always_bl_avg"),
          column.labels = c("Market DIM", "Market DID", "Individual DID",
                            "\\makecell{Individual DID\\\\Incl. Lagged DV}"),
          title = "Perceived Group Tax Compliance Robustness Models",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "Market-level models include block fixed-effects."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.caption = '',
          star.cutoffs = c(.05, .01, .001),
          dep.var.labels = c("\\makecell{Group-Perception of\\\\Always Complying}"),
          header = F,
          font.size = "footnotesize",
          label = "app:tab:o2_robust",
          out = paste0(out_path, "tableN4.tex"))

### Appendix N.1.3: Interacting BU and TD Treatment Assignment ----

#### Table N5 ----

##### Models ----
#interactions
n51_int <- lm(data = vendor_end, fee1_full ~ BU_treat + TD_treat
                            + BU_treat*TD_treat + block_id + as.factor(enum))

n52_int <- lm(data = vendor_end, fee2_always ~ BU_treat + TD_treat
                              + BU_treat*TD_treat + block_id + as.factor(enum))

n53_int <- lm(data = vendor_end, recent_receipt_7 ~ BU_treat + TD_treat
                           + BU_treat*TD_treat + block_id + as.factor(enum))

#no interactions
n51_noint <- lm(data = vendor_end, fee1_full ~ BU_treat + TD_treat
                              + block_id + as.factor(enum))

n52_noint <- lm(data = vendor_end, fee2_always ~ BU_treat + TD_treat
                                + block_id + as.factor(enum))

n53_noint <- lm(data = vendor_end, recent_receipt_7 ~ BU_treat + TD_treat
                             + block_id + as.factor(enum))

##### Tex Table ----
stargazer(n51_int,
          n51_noint,
          n52_int,
          n52_noint,
          n53_int,
          n53_noint,
          se = list(calc.ses.cluster(n51_int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(n51_noint, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(n52_int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(n52_noint, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(n53_int, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(n53_noint, "market", 
                                     data = vendor_end)),
          keep = c("BU_treat", "TD_treat"),
          title = "Analysis as Factorial Design (w/ Int., no Int.)",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.caption = '',
          star.cutoffs = c(.05, .01, .001),
          dep.var.labels = c("\\makecell{Self-Reported\\\\Full Compliance}",
                             "\\makecell{Self-Reported\\\\Always Complying}",
                             "\\makecell{Evidence of Receipt\\\\from Past 7 Days}"),
          header = F,
          label = "app:tab:n5int_noint",
          font.size = "footnotesize",
          covariate.labels = c("BU", "TD", "BU:TD"),
          out = paste0(out_path, "tableN5.tex"))

### Appendix N.1.4 ----

# Commented out because models fit but table not included in appendix

#### Models Code
# h1_o1full_0s <- lm(data = vendor_end, fee1_full_0s ~ BU + TD + Both + 
#                              block_id)
# h1_o1full_mkt_dim_0s <- lm(data = market_lvl_el, fee1_full_0s ~ BU + TD + Both + 
#                              block_id)
# h1_o1full_mkt_did_0s <- lm(data = market_lvl, 
#                            fee1_full_0s ~ Endline + BU + TD + Both + Endline:BU +
#                              Endline:TD + Endline:Both + block_id)
# h1_o2always_0s <- lm(data = vendor_end, fee2_always_0s ~ BU + TD + Both + 
#                                block_id + as.factor(enum))
# h1_o2always_mkt_dim_0s <- lm(data = market_lvl_el, fee2_always_0s ~ BU + TD + Both + 
#                                block_id)
# h1_o2always_mkt_did_0s <- lm(data = market_lvl, 
#                              fee2_always_0s ~ Endline + BU + TD + Both + Endline:BU +
#                                Endline:TD + Endline:Both + block_id)

#### Tex Table Code
# stargazer(h1_o1full_0s,
#           h1_o1full_mkt_dim_0s,
#           h1_o1full_mkt_did_0s,
#           h1_o2always_0s,
#           h1_o2always_mkt_dim_0s,
#           h1_o2always_mkt_did_0s,
#           se = list(calc.ses.cluster(h1_o1full_0s, "market", 
#                                      data = vendor_end),
#                     NULL,
#                     NULL,
#                     calc.ses.cluster(h1_o2always_0s, "market", 
#                                      data = vendor_end),
#                     NULL,
#                     NULL),
#           keep = c("BU", "TD", "Both"),
#           column.labels = c("Individual DIM", "Market DIM", "Market DID",
#                             "Individual DIM", "Market DIM", "Market DID"),
#           title = "H1: Self-Reported and Group-Perceived Tax Compliance (Incl. All 0s)",
#           model.numbers = F,
#           notes = c("Individual-level models include enumerator", 
#                     "and block fixed-effects.",
#                     "Individual-level models have SEs clustered",
#                     "on market.",
#                     "Market-level models include block fixed-effects."),
#           keep.stat = c("n", "adj.rsq"),
#           notes.label = "Notes:",
#           dep.var.caption = '',
#           star.cutoffs = c(.05, .01, .001),
#           dep.var.labels = c("\\makecell{Self-Reported\\\\Full Compliance}",
#                              "\\makecell{Perception of Others'\\\\Always Complying}"),
#           #no.space = T,
#           header = F,
#           font.size = "footnotesize",
#           label = "app:tab:main_0s")

### Appendix N.1.5: Alternative Outcomes ---- 

#### Table N6: Evidence of Receipt from Past 10 Days ----

##### Models ----
h1_o3_r10 <- lm(data = vendor_end, recent_receipt_10 ~ BU + TD + Both + 
                          block_id + as.factor(enum))
h1_o3_r10_mkt_dim <- lm(data = market_lvl_el, recent_receipt_10 ~ BU + TD + Both + 
                          block_id)
h1_o3_r10_mkt_did <- lm(data = market_lvl, 
                        recent_receipt_10 ~ Endline + BU + TD + Both + Endline:BU +
                          Endline:TD + Endline:Both + block_id)

##### Tex Table ----
stargazer(h1_o3_r10,
          h1_o3_r10_mkt_dim,
          h1_o3_r10_mkt_did,
          se = list(calc.ses.cluster(h1_o3_r10, "market", 
                                     data = vendor_end),
                    NULL,
                    NULL),
          keep = c("BU", "TD", "Both"),
          column.labels = c("Individual DIM", "Market DIM", "Market DID"),
          title = "H1: Evidence of Receipt from Past 10 Days",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "Market-level models include block fixed-effects."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = "Evidence of Receipt from Past 10 Days",
          dep.var.caption = '',
          star.cutoffs = c(.05, .01, .001),
          label = "app:tab:rr10",
          font.size = "footnotesize",
          header = F,
          out = paste0(out_path, "tableN6.tex"))


#### Table N7: Tax Collector Does Not Give Receipt ----

##### Models ----
h1_o3_norcpt <- lm(data = vendor_end, 
                           as.numeric(no_rcpt_when_pay_num > 2) ~ BU + TD +
                             Both + 
                             block_id + as.factor(enum))
h1_o3_norcpt_ind_did1 <- lm(data = vendor_all, 
                            as.numeric(no_rcpt_when_pay_num > 2) ~ Endline + BU
                            + TD + Both + Endline:BU +
                              Endline:TD + Endline:Both + block_id)
h1_o3_norcpt_ind_did2 <- lm(data = vendor_end,
                            as.numeric(no_rcpt_when_pay_num > 2) ~ BU + TD +
                              Both + 
                              no_rcpt_when_pay_uncommon_bl_avg + 
                              block_id + enum)

##### Tex Table ----
stargazer(h1_o3_norcpt,
          h1_o3_norcpt_ind_did1,
          h1_o3_norcpt_ind_did2,
          se = list(calc.ses.cluster(h1_o3_norcpt, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h1_o3_norcpt_ind_did1, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h1_o3_norcpt_ind_did2, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "Endline", "no_rcpt_when_pay_bl_avg"),
          column.labels = c("EL DIM", 
                            "BL-EL DID", "\\makecell{EL DID\\\\(Lagged DV)}"),
          font.size = "footnotesize",
          title = "H1: Outcome 3 - Tax Collector Does \\textbf{Not} Give You A Receipt When You Pay Fee",
          model.numbers = F,
          notes = c("All models have individuals as unit of analysis.",
                    "All include block fixed-effects.",
                    "\\makecell[r]{Endline only models include enumerator\\\\fixed-effects as well.}",
                    "All models have SEs clustered on market.",
                    "\\makecell[r]{Lagged DV model includes baseline\\\\market average of DV.}",
                    "Outcome is binary."),
          keep.stat = c("n", "adj.rsq"),
          dep.var.labels = c("No Receipt When Paying"),
          star.cutoffs = c(.05, .01, .001),
          notes.label = "Notes",
          label = "app:tab:no_rcpt_when_pay",
          dep.var.caption = '',
          header = F,
          out = paste0(out_path, "tableN7.tex"))

#### Table N8: Additional Receipt Robustness Models ----

##### Models ----
h1_o3_gotrcpt <- lm(data = vendor_end, 
                            got_rcpt_last_time ~ BU + TD +
                              Both + 
                              block_id + as.factor(enum))
h1_o3_keep_rcpts <- lm(data = vendor_end, 
                               typ_rcpt_keep_days  ~ BU + TD + Both + 
                                 block_id + as.factor(enum))
h1_o3_r_age_mod <- lm(data = vendor_end, 
                receipt_age ~ BU + TD + Both + 
                  block_id + as.factor(enum))


##### Tex Table ----
stargazer(h1_o3_r_age_mod,
          h1_o3_gotrcpt,
          h1_o3_keep_rcpts,
          se = list(calc.ses.cluster(h1_o3_r_age_mod, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h1_o3_gotrcpt, 
                                     "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h1_o3_keep_rcpts,
                                     "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both"),
          title = "H1: Outcome 3 - Additional Recent Receipt Variable Robustness Models",
          model.numbers = F,
          notes = c("Model only includes respondents who were able to present a receipt.",
                    "Model includes block fixed-effects.",
                    "SEs clustered on market."
          ),
          keep.stat = c("n", "adj.rsq"),
          dep.var.labels = c("\\makecell{Age of\\\\Actual Receipt}",
                             "\\makecell{Received Receipt\\\\Last Time Paid}",
                             "\\makecell{Number of Days\\\\Receipt Usually Kept}"),
          star.cutoffs = c(.05, .01, .001),
          notes.label = "Notes",
          label = "app:tab:rcp_age",
          dep.var.caption = '',
          header = F,
          out = paste0(out_path, "tableN8.tex"))

## Appendix N.2 Mechanism Outcomes ----


### Table N9 ----

#### Models ----
h4_1a <- lm(data = vendor_all, 
                     tr1_num ~ Endline + BU + TD + Both + Endline:BU +
                       Endline:TD + Endline:Both + block_id)
h4_1b <- lm(data = vendor_end,
                     tr1_num ~ BU + TD + Both + tr1_bl_avg + 
                       block_id + enum)
h4_2a <- lm(data = vendor_all, 
                     tr2_num ~ Endline + BU + TD + Both + Endline:BU +
                       Endline:TD + Endline:Both + block_id)
h4_2b <- lm(data = vendor_end,
                     tr2_num ~ BU + TD + Both + tr2_bl_avg + 
                       block_id + enum)
h5_3a <- lm(data = vendor_all, 
                       tr9e_num ~ Endline + BU + TD + Both + Endline:BU +
                         Endline:TD + Endline:Both + block_id)
h5_3b <- lm(data = vendor_end,
                       tr9e_num ~ BU + TD + Both + tr9e_bl_avg + 
                         block_id + enum)

#### Tex Table ----
stargazer(h4_1a,
          h4_1b,
          h4_2a,
          h4_2b,
          h5_3a,
          h5_3b,
          se = list(calc.ses.cluster(h4_1a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h4_1b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h4_2a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h4_2b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h5_3a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h5_3b, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "Endline", "tr1_bl_avg" , "tr2_bl_avg",
                   "tr9e_bl_avg"),
          title = "Bottom-Up Causal Mechanism Outcomes: H4 - H5 - Individual-Level DID Results",
          model.numbers = F,
          column.labels = c("BL-EL DID", "EL DID (Lagged DV)", 
                            "BL-EL DID", "EL DID (Lagged DV)",
                            "BL-EL DID", "EL DID (Lagged DV)"),
          notes = c("All models include block fixed-effects.",
                    "Endline models include enumerator fixed-effects.",
                    "All models have SEs clustered on market.",
                    "Lagged DV models include market baseline average for DV.",
                    "All outcomes are on a 4-point scale."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Trust Local Gov.", "Trust in Ward Cllr.",
                             "DC Manages Funds Well",
                             "DC Transp. Using Funds",
                             "DC Transp. wrt. Collecting"), 
          font.size = "scriptsize",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          model.names = T,
          out = paste0(out_path, "tableN9.tex"))

### Table N10 ----

#### Models ----
h6_1a <- lm(data = vendor_all, 
                       satisfaction_dev_num ~ Endline + BU + TD + Both + 
                         Endline:BU +
                         Endline:TD + Endline:Both + block_id)
h6_1b <- lm(data = vendor_end,
                       satisfaction_dev_num ~ BU + TD + Both + 
                         satisfaction_dev_bl_avg + 
                         block_id + enum)
# perceived amount of spending on services
h6_2a <- lm(data = vendor_all, 
                         tc2_10_clean ~ Endline + BU + TD + Both + 
                           Endline:BU +
                           Endline:TD + Endline:Both + block_id)
h6_2b <- lm(data = vendor_end,
                         tc2_10_clean ~ BU + TD + Both + 
                           tc2_10_bl_avg + 
                           block_id + enum)

#### Tex Table ----
stargazer(h6_1a,
          h6_1b,
          h6_2a,
          h6_2b,
          se = list(calc.ses.cluster(h6_1a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h6_1b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_2a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h6_2b, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "Endline", "ms10_bl_avg" , "tc2_10_bl_avg"),
          title = "Bottom-Up Causal Mechanism Outcomes: H6 - Individual-Level DID Results",
          model.numbers = F,
          column.labels = c("BL-EL DID", "EL DID (Lagged DV)", 
                            "BL-EL DID", "EL DID (Lagged DV)"),
          notes = c("All models include block fixed-effects.",
                    "Endline models include enumerator fixed-effects as well.",
                    "All models have SEs clustered on market.",
                    "Lagged DV models include market baseline average of DV.",
                    "Outcome 1 is on a 4-point scale. Outcome 2 is a number out of 1000."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Services Satisfaction", 
                             "Percep. of Sp. on Services"), 
          font.size = "footnotesize",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          model.names = T,
          out = paste0(out_path, "tableN10.tex"))

### Table N11 ----

#### Models ----
#ms1
h6_ms1 <- lm(data = vendor_end,
                     as.numeric(ms1_clean) ~ BU + TD + Both + block_id + 
                       as.factor(enum))
#ms3
h6_ms3 <- lm(data = vendor_end,
                     as.numeric(ms3_clean) ~ BU + TD + Both + block_id + 
                       as.factor(enum))
#ms4
h6_ms4 <- lm(data = vendor_end,
                     as.numeric(ms4_clean) ~ BU + TD + Both + block_id + 
                       as.factor(enum))
#ms5
h6_ms5 <- lm(data = vendor_end,
                     as.numeric(ms5_clean) ~ BU + TD + Both + block_id + 
                       as.factor(enum))

#ms6
h6_ms6 <- lm(data = vendor_end,
                     as.numeric(ms6_clean) ~ BU + TD + Both + block_id + 
                       as.factor(enum))

#### Tex Table ----
stargazer(h6_ms1,
          h6_ms3,
          h6_ms4,
          h6_ms5,
          h6_ms6,
          se = list(calc.ses.cluster(h6_ms1, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms3, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms4, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms5, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms6, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both"),
          title = "Bottom-Up Causal Mechanism Outcomes: H6 - Satisfaction with Specific Services",
          model.numbers = F,
          notes = c("Individual-level models include enumerator", 
                    "and block fixed-effects.",
                    "Individual-level models have SEs clustered",
                    "on market.",
                    "All outcomes are on a 4-point scale."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Clean Water Access",
                             "Garbage Collection",
                             "Condition of Paths",
                             "Condition of Stalls",
                             "Security"), 
          font.size = "footnotesize",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          model.names = T,
          out = paste0(out_path, "tableN11.tex"))

### Table N12 ----

#### Models ----
#ms1
h6_ms1a <- lm(data = vendor_all,
                      ms1_num ~ Endline + BU + TD + Both + block_id + 
                        Endline:BU +
                        Endline:TD + Endline:Both)
h6_ms1b <- lm(data = vendor_end,
                      ms1_num ~ BU + TD + Both + 
                        ms1_bl_avg + 
                        block_id + enum)
#ms3
h6_ms3a <- lm(data = vendor_all,
                      ms3_num ~ Endline + BU + TD + Both + 
                        Endline:BU +
                        Endline:TD + Endline:Both + block_id)
h6_ms3b <- lm(data = vendor_end,
                      ms3_num ~ BU + TD + Both + 
                        ms3_bl_avg + 
                        block_id + enum)

#ms4
h6_ms4a <- lm(data = vendor_all,
                      ms4_num ~ Endline + BU + TD + Both + 
                        Endline:BU +
                        Endline:TD + Endline:Both + block_id)
h6_ms4b <- lm(data = vendor_end,
                      ms4_num ~ BU + TD + Both + 
                        ms4_bl_avg + 
                        block_id + enum)

#### Tex Table ----
stargazer(h6_ms1a,
          h6_ms1b,
          h6_ms3a,
          h6_ms3b,
          h6_ms4a,
          h6_ms4b,
          se = list(calc.ses.cluster(h6_ms1a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h6_ms1b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms3a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h6_ms3b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms4a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h6_ms4b, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "Endline", "ms4_bl_avg",
                   "ms1_bl_avg", "ms3_bl_avg"),
          title = "Bottom-Up Causal Mechanism Outcomes: H6 - Satisfaction with Specific Services (Water Through Paths)",
          model.numbers = F,
          notes = c("All models include block fixed-effects.",
                    "Endline models include enumerator fixed-effects as well.",
                    "All models have SEs clustered on market.",
                    "Lagged DV models include market baseline average of DV.",
                    "All outcomes are on a 4-point scale."),
          column.labels = c("BL-EL DID", "EL DID (Lagged DV)", 
                            "BL-EL DID", "EL DID (Lagged DV)",
                            "BL-EL DID", "EL DID (Lagged DV)"),
          keep.stat = c("n", "adj.rsq"),
          star.cutoffs = c(.05, .01, .001),
          notes.label = "Notes:",
          dep.var.labels = c("Clean Water Access",
                             "Garbage Collection",
                             "Condition of Paths"), 
          font.size = "footnotesize",
          header = F,
          model.names = T,
          out = paste0(out_path, "tableN12.tex"))


### Table N13 ----

#### Models ----
h6_ms5_ind_did1 <- lm(data = vendor_all,
                      ms5_num ~ Endline + BU + TD + Both + 
                        Endline:BU +
                        Endline:TD + Endline:Both + block_id)
h6_ms5_ind_did2 <- lm(data = vendor_end,
                      ms5_num ~ BU + TD + Both + 
                        ms5_bl_avg + 
                        block_id + enum)
#ms6
h6_ms6_ind_did1 <- lm(data = vendor_all,
                      ms6_num ~ Endline + BU + TD + Both + 
                        Endline:BU +
                        Endline:TD + Endline:Both + block_id)
h6_ms6_ind_did2 <- lm(data = vendor_end,
                      ms6_num ~ BU + TD + Both + 
                        ms6_bl_avg + 
                        block_id + enum)

#### Tex Table ----
stargazer(h6_ms5_ind_did1,
          h6_ms5_ind_did2,
          h6_ms6_ind_did1,
          h6_ms6_ind_did2,
          se = list(calc.ses.cluster(h6_ms5_ind_did1, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h6_ms5_ind_did2, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h6_ms6_ind_did1, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h6_ms6_ind_did2, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "Endline", "ms5_bl_avg",
                   "ms6_bl_avg"),
          title = "Bottom-Up Causal Mechanism Outcomes: H6 - Satisfaction with Specific Services (Stall Condition and Security)",
          model.numbers = F,
          notes = c("All models include block fixed-effects.",
                    "Endline models include enumerator fixed-effects as well.",
                    "All models have SEs clustered on market.",
                    "Lagged DV models include market baseline average of DV.",
                    "All outcomes are on a 4-point scale."),
          keep.stat = c("n", "adj.rsq"),
          star.cutoffs = c(.05, .01, .001),
          notes.label = "Notes:",
          dep.var.labels = c("Condition of Stalls",
                             "Security"),
          column.labels = c("BL-EL DID", "EL DID (Lagged DV)", 
                            "BL-EL DID", "EL DID (Lagged DV)"),
          font.size = "footnotesize",
          header = F,
          model.names = T,
          out = paste0(out_path, "tableN13.tex"))

### Table N14 ----

#### Models ----
h7_1a <- lm(data = vendor_all, 
                          tc2_4b_num ~ Endline + BU + TD + Both + 
                            Endline:BU +
                            Endline:TD + Endline:Both + block_id)
h7_1b <- lm(data = vendor_end,
                          tc2_4b_num ~ BU + TD + Both + 
                            tc2_4b_bl_avg + 
                            block_id + enum)
h7_2a <- lm(data = vendor_all, 
                                    pay_even_disagree ~ Endline + BU + TD + Both + 
                                      Endline:BU +
                                      Endline:TD + Endline:Both + block_id)
h7_2b <- lm(data = vendor_end,
                                    pay_even_disagree ~ BU + TD + Both + 
                                      pay_even_disagree_bl_avg + 
                                      block_id + enum)

#### Tex Table ---- 
stargazer(h7_1a,
          h7_1b,
          h7_2a,
          h7_2b, 
          se = list(calc.ses.cluster(h7_1a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h7_1b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h7_2a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h7_2b, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "Endline", "pay_even_disagree_bl_avg",
                   "tc2_4b_bl_avg"),
          title = "Bottom-Up Causal Mechanism Outcomes: H7 - Individual-Level DID Results",
          column.labels = c("BL-EL DID", "EL DID (Lagged DV)", 
                            "BL-EL DID", "EL DID (Lagged DV)"),
          model.numbers = F,
          notes = c("All models include block fixed-effects.",
                    "Endline models include enumerator fixed-effects as well.",
                    "All models have SEs clustered on market.",
                    "Lagged DV models include market baseline average of DV.",
                    "Outcome 1 is on a 4-point scale. Outcome 2 is dichotomous."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Paying Tax as Duty",
                             "Pay Tax Even if Disag. w. Gov."), 
          font.size = "footnotesize",
          header = F,
          star.cutoffs = c(.05, .01, .001),
          model.names = T,
          out = paste0(out_path, "tableN14.tex"))

### Table N15 ----



#### Models ----
h9_1a <- lm(data = vendor_all, 
                       tc5a_num ~ Endline + BU + TD + Both + 
                         Endline:BU +
                         Endline:TD + Endline:Both + block_id)
h9_1b <- lm(data = vendor_end,
                       tc5a_num ~ BU + TD + Both + 
                         tc5a_bl_avg + 
                         block_id + enum)
h9_2a <- lm(data = vendor_all, 
                       tc5b_num ~ Endline + BU + TD + Both + 
                         Endline:BU +
                         Endline:TD + Endline:Both + block_id)
h9_2b <- lm(data = vendor_end,
                       tc5b_num ~ BU + TD + Both + 
                         tc5b_bl_avg + 
                         block_id + enum)
h9_3a <- lm(data = vendor_all, 
                          tc2_15b_num ~ Endline + BU + TD + Both + 
                            Endline:BU +
                            Endline:TD + Endline:Both + block_id)
h9_3b <- lm(data = vendor_end,
                          tc2_15b_num ~ BU + TD + Both + 
                            tc2_15b_bl_avg + 
                            block_id + enum)

#### Tex Table ----
stargazer(h9_1a,
          h9_1b,
          h9_2a,
          h9_2b,
          h9_3a,
          h9_3b,
          se = list(calc.ses.cluster(h9_1a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h9_1b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h9_2a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h9_2b, "market", 
                                     data = vendor_end),
                    calc.ses.cluster(h9_3a, "market", 
                                     data = vendor_all),
                    calc.ses.cluster(h9_3b, "market", 
                                     data = vendor_end)),
          keep = c("BU", "TD", "Both", "Endline", "tc5a_bl_avg", "tc5b_bl_avg",
                   "tc2_15b_bl_avg"),
          title = "Top-Down Causal Mechanisms Outcomes, Vendor Survey - Individual-Level DID Results",
          model.numbers = F,
          notes = c("All models include block fixed-effects.",
                    "Endline models include enumerator fixed-effects as well.",
                    "All models have SEs clustered on market.",
                    "Lagged DV models include market baseline average of DV.",
                    "All outcomes are on a 4-point scale."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Could Refuse to Pay", "Group Non-Comp. Poss.",
                             "Pay Because Consequences"),
          column.labels = c("BL-EL DID", "\\makecell{EL DID\\\\(Lagged DV)}", 
                            "BL-EL DID", "\\makecell{EL DID\\\\(Lagged DV)}",
                            "BL-EL DID", "\\makecell{EL DID\\\\(Lagged DV)}"),
          font.size = "footnotesize",
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "tableN15.tex"))

### Table N16 ----

#### Models ----
h11_1a <- lm(data = tax_collector_all, 
                              hrs_in_mkt ~ Endline + BU + TD + Both + 
                                Endline:BU +
                                Endline:TD + Endline:Both + block_id)
h11_1b <- lm(data = tax_collector_end,
                              hrs_in_mkt ~ BU + TD + Both + 
                                hrs_in_mkt_bl_avg + 
                                block_id + as.factor(enum))
h11_2a <- lm(data = tax_collector_all, 
                                   vendors_visited_trim_99 ~ Endline + BU + TD + Both + 
                                     Endline:BU +
                                     Endline:TD + Endline:Both + block_id)
h11_2b <- lm(data = tax_collector_end,
                                   vendors_visited_trim_99 ~ BU + TD + Both + 
                                     vendors_visited_trim_99_bl_avg + 
                                     block_id + as.factor(enum))

#### Tex Table ----
stargazer(h11_1a,
          h11_1b,
          h11_2a,
          h11_2b,
          se = list(calc.ses.cluster(h11_1a, "market", 
                                     data = tax_collector_all),
                    calc.ses.cluster(h11_1b, "market", 
                                     data = tax_collector_end),
                    calc.ses.cluster(h11_2a, "market", 
                                     data = tax_collector_all),
                    calc.ses.cluster(h11_2b, "market", 
                                     data = tax_collector_end)),
          keep = c("BU", "TD", "Both", "Endline", "hrs_in_mkt_bl_avg", 
                   "vendors_visited_trim_99_bl_avg"),
          title = "Top-Down Causal Mechanisms Outcomes, Tax Collector Survey",
          model.numbers = F,
          notes = c("All models include block fixed-effects.",
                    "Endline models include enumerator fixed-effects as well.",
                    "All models have SEs clustered on market.",
                    "Lagged DV models include market baseline average of DV.",
                    "Outcome 1 is hours in a day. Outcome 2 is a positive integer."),
          keep.stat = c("n", "adj.rsq"),
          notes.label = "Notes:",
          dep.var.labels = c("Hours Working in Market A Day",
                             "Vendors Visited Per Day"),
          column.labels = c("BL-EL DID", "\\makecell{EL DID\\\\(Lagged DV)}", 
                            "BL-EL DID", "\\makecell{EL DID\\\\(Lagged DV)}"),
          star.cutoffs = c(.05, .01, .001),
          header = F,
          out = paste0(out_path, "tableN16.tex"))

# Appendix O: Multiple Hypothesis Testing Correction ---------------------------

## Appendix O.1: Main Paper Results ----

# defining variables we want to correct for
test_vars <- c("BU", "TD", "Both")

### Hypothesis 1 (Table 2 + Table N1) ----

# combining models into lists
h1_mods_ind <- list(h1_o1full,
                    h1_o2always,
                    h1_o3_r7)
h1_mods_mkt <- list(h1_o1full_mkt_did_diff,
                    h1_o2always_mkt_did_diff,
                    h1_o3_r7_mkt_did_diff) 
h1_ps_corr <- make_mhtc_table(h1_mods_ind, h1_mods_mkt,
                              test_vars = test_vars,
                              hypothesis = "H1",
                              outcomes_ind = c("Self", "Group", "Receipt"),
)

### BU ----

## Why are we correcting for TD here? We only care about BU treatments

BU_mech_vars <- c("BU", "Both")

#### Hypothesis 4 (Columns 1 and 2 of Table 3) ----
h4_mods_ind <- list(h4_dg,
                    h4_wc)
h4_ps_corr <- make_mhtc_table(h4_mods_ind,
                              hypothesis = "H4",
                              outcomes_ind = c("Trust in Local Gov.",
                                               "Trust in Ward Counc."),
                              test_vars = BU_mech_vars)
#### Hypothesis 5 (Columns 3-5 of Table 3) ----
h5_mods_ind <- list(h5_tr9e,
                    h5_tr9g,
                    h5_tr9h)
h5_ps_corr <- make_mhtc_table(h5_mods_ind,
                              hypothesis = "H5",
                              outcomes_ind = c("Dist. Manages Funds Well", 
                                               "Dist. Transp. Spending",
                                               "Dist. Transp. Tax Collection"),
                              test_vars = BU_mech_vars)

#### Hypothesis 6 (Columns 1-3 of Table 4) ----
h6_ps_corr <- list(h6_ms10,
                   h6_ms3,
                   h6_tc2_10) |> 
  make_mhtc_table(hypothesis = "H6",
                  outcomes_ind = c("Services Satisfaction",
                                   "Satisfaction with Water Access",
                                   "Percep. of Spending on Services"),
                  test_vars = BU_mech_vars)

#### Hypothesis 7 (Columns 4-5 of Table 4) ----
h7_ps_corr <- list(h7_taxduty,
                   h7_tax_even_disagree) |> 
  make_mhtc_table(hypothesis = "H7",
                  outcomes_ind = c("Paying Tax as Duty", "Tax Morale"),
                  test_vars = BU_mech_vars)

####Hypothesis 8 (Table 5) ----
h8_ps_corr <- list(h8_election3,
                   h8_p1_el,
                   h8_p2_el,
                   h8_st1_el,
                   h8_st2_el) |> 
  make_mhtc_table(hypothesis = "H8",
                  outcomes_ind = c("Vote", 
                                   "Petition Anon.",
                                   "Petition w. Name",
                                   "Agree St. 1",
                                   "Agree St. 2"),
                  test_vars = BU_mech_vars)

### TD ----

## Why are we correcting for BU here? we don't care about BU treatments here

TD_mech_vars <- c("TD", "Both")

#### Hypothesis 9 (Columns 1-3 of Table 6) ----
# PAP hypothesis 8
h9_ps_corr <- list(h9_tc5a,
                   h9_tc5b,
                   h9_tc2_15b) |> 
  make_mhtc_table(hypothesis = "H9",
                  outcomes_ind = c("Ind'l Evasion Possible",
                                   "Group Evasion Possible",
                                   "Pay Because Consequences"),
                  test_vars = TD_mech_vars)

#### Hypothesis 10 (Column 4 of Table 6) ----
# PAP hypothesis 9
h10_ps_corr <- list(h10_tc9) |> 
  make_mhtc_table(hypothesis = "H10",
                  outcomes_ind = c("Money Flowing to Gov't"),
                  test_vars = TD_mech_vars)

#### Hypothesis 11 (Table 7) ----
# PAP hypothesis 10
h11_ps_corr <- list(h11_hrs_in_mkt,
                    h11_vendors_visited) |> 
  make_mhtc_table(hypothesis = "H11",
                  outcomes_ind = c("Hours Working in Market A Day",
                                   "Vendors Visited Per Day"),
                  cluster_data = tax_collector_end,
                  test_vars = TD_mech_vars)

### Combining ----
ps_corr <- list_rbind(list(h1_ps_corr, h4_ps_corr, h5_ps_corr,
                           h6_ps_corr, h7_ps_corr, h8_ps_corr,
                           h9_ps_corr, h10_ps_corr, h11_ps_corr))

# getting subset that was originally significant
ps_corr_og_sig <- ps_corr |> 
  filter(p <= 0.05)

### Tex Table ---- 
ps_corr_og_sig <- ps_corr_og_sig |> 
  rename(`$p$` = p,
         `$p$ Holm` = p_holm,
         `$p$ BH` = p_BH,
         `Survives Holm` = survives_holm,
         `Survives BH` = survives_BH
  ) |> 
  select(-`$p$ BH`, -`Survives BH`)

print(xtable::xtable(ps_corr_og_sig, label = "app:tab:mhtc",
                     align = c("l", "p{0.1\\linewidth}", "p{0.2\\linewidth}", 
                               "p{0.05\\linewidth}", "p{0.1\\linewidth}",
                               "p{0.05\\linewidth}", #"p{0.05\\linewidth}",
                               "p{0.05\\linewidth}", #"p{0.075\\linewidth}",
                               "p{0.075\\linewidth}"),
                     caption = "Multiple Hypothesis Testing Correction",
                     digits = 3),
      comment = F,
      include.rownames = F,
      hline.after = 0:nrow(ps_corr_og_sig),
      sanitize.text.function = sanitize_allow_latex,
      sanitize.colnames.function = sanitize_allow_latex,
      caption.placement = "top",
      file = paste0(out_path, "tableO1.tex")
)  
 
## Appendix 0.2: Holm-Corrected Wald Tests ----

## NOTE: This code assumes that you have run the code for Appendix M, as that
## creates objects necessary for this appendix's code to run

### Table O2 ----
get_linhyps_ps_mods(
  list(r7_ind = h1_o3_r7,
       r7_did = h1_o3_r7_mkt_did_diff),
  lin_hyps = lin_hyps,
  cluster = list(vendor_end$market, NULL),
  correct = "overall"
) |> 
  print_linhyps_table(
    mod_names = main_mods_names,
    adj_only = TRUE,
    caption = "Holm-corrected $p$-Values for Linear Hypothesis (Wald) Tests for Receipt Outcome Models in Table 2",
    label = "app:tab:lin_hyps:main:holm",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableO2.tex")
  )

### Table O3 ----
map(bu_mods_list1, \(x) get_linhyps_ps_mods(x, lin_hyps = lin_hyps,
                                            clusters = list(vendor_end$market),
                                            correct = "overall")) |> 
  list_rbind() |> 
  print_linhyps_table(
    mod_names = bu_mods_names1,
    adj_only = TRUE,
    caption = "Holm-corrected (Within Hypotheses H4 and H5) $p$-Values for Linear Hypothesis (Wald) Tests for Bottom-Up Mechanism Models in Table 3",
    label = "app:tab:lin_hyps:bu1:holm",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableO3.tex")
  )

### Table O4 ----
map(bu_mods_list2, \(x) get_linhyps_ps_mods(x, lin_hyps = lin_hyps,
                                            clusters = list(vendor_end$market),
                                            correct = "overall")) |> 
  list_rbind() |> 
  print_linhyps_table(
    mod_names = bu_mods_names2,
    adj_only = TRUE,
    caption = "Holm-corrected (Within Hypotheses H6 and H7) $p$-Values for Linear Hypothesis (Wald) Tests for Bottom-Up Mechanism Models in Table 4",
    label = "app:tab:lin_hyps:bu2:holm",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableO4.tex")
  )

### Table O5 ----
get_linhyps_ps_mods(h8_mods,
                    lin_hyps = lin_hyps,
                    clusters = list(vendor_end$market),
                    correct = "overall") |> 
  print_linhyps_table(
    mod_names = h8_mods_names,
    adj_only = TRUE,
    caption = "Holm-corrected $p$-Values for Linear Hypothesis (Wald) Tests for Political Engagement Outcome Models in Table 5",
    label = "app:tab:lin_hyps:pol-eng:holm",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableO5.tex")
  )

### Table O6 ----
td_mods_h9_holm <- list(h9_tc5a = h9_tc5a,
                        h9_tc5b = h9_tc5b,
                        h9_tc2_15b = h9_tc2_15b) |> 
  get_linhyps_ps_mods(lin_hyps = lin_hyps,
                      clusters = list(vendor_end$market),
                      correct = "overall")

td_mods_h10_holm <- list(h10_tc9 = h10_tc9) |> 
  get_linhyps_ps_mods(lin_hyps = lin_hyps,
                      clusters = list(vendor_end$market),
                      correct = "overall")

td_mods_h11_holm <- list(h11_hrs_in_mkt = h11_hrs_in_mkt,
                         h11_vendors_visited = h11_vendors_visited) |> 
  get_linhyps_ps_mods(lin_hyps = lin_hyps,
                      clusters = list(tax_collector_end$market),
                      correct = "overall")

list(td_mods_h9_holm, td_mods_h10_holm, td_mods_h11_holm) |> 
  list_rbind() |> 
  print_linhyps_table(
    mod_names = td_mods_names,
    adj_only = TRUE,
    caption = "Holm-corrected (Within Hypotheses H8, H9, H10) $p$-Values for Linear Hypothesis (Wald) Tests for Top-Down Mechanism Models in Table 6-7",
    label = "app:tab:lin_hyps:td:holm",
    caption.placement = "top",
    include.rownames = FALSE,
    sanitize.text.function = sanitize_passthrough,
    size = "scriptsize",
    file = paste0(out_path, "tableO6.tex")
  )  
